diff --git a/.clang-format b/.clang-format
index 29924594b..ba886a47c 100644
--- a/.clang-format
+++ b/.clang-format
@@ -86,7 +86,7 @@ IncludeCategories:
     Priority: 2
   - Regex: '^"(tests|bindings)/'
     Priority: 3
-  - Regex: '^"(fmt|igor|fast_float|cxxopts|gtest|gmock|pybind|nvml|rocm_smi|level_zero|subprocess)'
+  - Regex: '^"(fmt|igor|fast_float|cxxopts|gtest|gmock|pybind|boost|mpi)'
     Priority: 4
   - Regex: '^.*'
     Priority: 5
diff --git a/.github/workflows/clang_gcc_linux.yml b/.github/workflows/clang_gcc_linux.yml
index 1fa8867a8..a57ed8b35 100644
--- a/.github/workflows/clang_gcc_linux.yml
+++ b/.github/workflows/clang_gcc_linux.yml
@@ -21,6 +21,9 @@ jobs:
       - name: "Install Compiler"
         run: |
           sudo apt install g++ clang libomp-dev
+      - name: "Install MPI"
+        run: |
+          sudo apt install libopenmpi-dev
       - name: "Install cmake 3.31.0"
         uses: lukka/get-cmake@v3.31.0
       - name: "Clone the PLSSVM repository into PLSSVM/"
@@ -34,7 +37,7 @@ jobs:
       - name: "Configure PLSSVM using CMake"
         run: |
           cd PLSSVM
-          cmake --preset openmp_test -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF
+          cmake --preset openmp_test -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_ENABLE_MPI=ON -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF
       - name: "Build PLSSVM"
         run: |
           cd PLSSVM
@@ -43,4 +46,6 @@ jobs:
       - name: "Run tests"
         run: |
           cd PLSSVM
+          mkdir tmp
+          export TMPDIR=$PWD/tmp
           ctest --preset openmp_test -C ${{ matrix.build_type }} --parallel 2
\ No newline at end of file
diff --git a/.github/workflows/clang_macos.yml b/.github/workflows/clang_macos.yml
index 052bdd2a7..6c3269609 100644
--- a/.github/workflows/clang_macos.yml
+++ b/.github/workflows/clang_macos.yml
@@ -40,7 +40,7 @@ jobs:
           cd PLSSVM
           export LDFLAGS="-L/opt/homebrew/opt/libomp/lib"
           export CPPFLAGS="-I/opt/homebrew/opt/libomp/include"
-          cmake --preset openmp_test -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF
+          cmake --preset openmp_test -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_ENABLE_MPI=OFF -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF
       - name: "Build PLSSVM"
         shell: bash
         run: |
diff --git a/.github/workflows/msvc_windows.yml b/.github/workflows/msvc_windows.yml
index f833ad83a..537fb15de 100644
--- a/.github/workflows/msvc_windows.yml
+++ b/.github/workflows/msvc_windows.yml
@@ -27,7 +27,7 @@ jobs:
       - name: "Configure PLSSVM using CMake"
         run: |
           cd PLSSVM
-          cmake --preset openmp_test -DCMAKE_CONFIGURATION_TYPES=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20
+          cmake --preset openmp_test -DCMAKE_CONFIGURATION_TYPES=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_ENABLE_MPI=OFF -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20
       - name: "Build PLSSVM"
         shell: bash
         run: |
diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml
index cf8e9d991..7ced58ffa 100644
--- a/.github/workflows/pip.yml
+++ b/.github/workflows/pip.yml
@@ -17,6 +17,9 @@ jobs:
       - name: "Install new g++"
         run: |
           sudo apt install g++
+      - name: "Install MPI (necessary for mpi4py)"
+        run: |
+          sudo apt install libopenmpi-dev
       - name: "Clone the PLSSVM repository into PLSSVM/"
         uses: actions/checkout@v4.1.1
         with:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7ef583bd8..d43c1f825 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,6 +82,7 @@ set(PLSSVM_BASE_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/SYCL/kernel_invocation_types.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/stdpar/implementation_types.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/execution_range.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/data_set/min_max_scaler.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/cmd/parser_predict.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/cmd/parser_scale.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/cmd/parser_train.cpp
@@ -89,10 +90,15 @@ set(PLSSVM_BASE_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/data_distribution.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/memory_size.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/sha256.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/string_conversion.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/string_utility.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/utility.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/exceptions/exceptions.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/exceptions/source_location.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/mpi/detail/information.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/mpi/detail/utility.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/mpi/detail/version.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/mpi/communicator.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/mpi/environment.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/svm/csvm.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/version/version.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/version/git_metadata/git_metadata.cpp
@@ -233,6 +239,38 @@ else ()
     message(WARNING "Couldn't find OpenMP. Note that in a multi-GPU setting this will result in serialized kernel calls across all GPUs!")
 endif ()
 
+########################################################################################################################
+#                                              check for MPI (optional)                                              ##
+########################################################################################################################
+# check for MPI
+set(PLSSVM_ENABLE_MPI AUTO CACHE STRING "Enable distributed memory support via MPI")
+set_property(CACHE PLSSVM_ENABLE_MPI PROPERTY STRINGS AUTO ON OFF)
+if (PLSSVM_ENABLE_MPI MATCHES "AUTO" OR PLSSVM_ENABLE_MPI)
+    list(APPEND CMAKE_MESSAGE_INDENT "MPI:  ")
+    message(CHECK_START "Checking for MPI")
+
+    # try finding MPI
+    find_package(MPI)
+
+    if (MPI_FOUND)
+        # MPI found
+        message(CHECK_PASS "found ")
+
+        message(STATUS "Found MPI ${MPI_CXX_VERSION} for distributed memory support.")
+        set(PLSSVM_FOUND_MPI ON)
+        target_link_libraries(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC MPI::MPI_CXX)
+        target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_HAS_MPI_ENABLED)
+    else ()
+        # MPI not found
+        message(CHECK_FAIL "not found")
+        if (PLSSVM_ENABLE_MPI MATCHES "ON")
+            message(SEND_ERROR "Cannot find requested MPI!")
+        endif ()
+    endif ()
+
+    list(POP_BACK CMAKE_MESSAGE_INDENT)
+endif ()
+
 ########################################################################################################################
 #                                                  create executables                                                  #
 ########################################################################################################################
@@ -260,6 +298,7 @@ list(
 ########################################################################################################################
 # coverage analysis only possible with the Coverage CMAKE_BUILD_TYPE
 if (uppercase_CMAKE_BUILD_TYPE MATCHES COVERAGE)
+    list(APPEND CMAKE_MESSAGE_INDENT "coverage:  ")
     # must be linux
     if (WIN32 OR APPLE)
         message(FATAL_ERROR "Only Linux is supported for the coverage analysis.")
@@ -268,31 +307,61 @@ if (uppercase_CMAKE_BUILD_TYPE MATCHES COVERAGE)
     if (NOT CMAKE_CXX_COMPILER_ID MATCHES "GNU")
         message(FATAL_ERROR "Only GCC is supported for the coverage analysis.")
     endif ()
+    message(STATUS "Enable code coverage analysis using lcov and genhtml.")
+
     # tests must be available for a coverage analysis
     message(STATUS "Enabling tests since they are necessary for the coverage analysis.")
     set(PLSSVM_ENABLE_TESTING ON CACHE BOOL "" FORCE)
+    # disable fast-math like it should be in the normal tests
+    message(STATUS "Disabling fast-math for the tests.")
+    set(PLSSVM_ENABLE_FAST_MATH OFF CACHE BOOL "" FORCE)
     # assertions must be available for a coverage analysis
     message(STATUS "Enabling assertions since they are necessary for the coverage analysis.")
     set(PLSSVM_ENABLE_ASSERTS ON CACHE BOOL "" FORCE)
+    # LTO must be disabled for a coverage analysis
+    message(STATUS "Disabling LTO since it may interfere with the coverage analysis.")
+    set(PLSSVM_ENABLE_LTO OFF CACHE BOOL "" FORCE)
+
+    # also enable code coverage for nvcc
+    set(CMAKE_CUDA_FLAGS "-Xcompiler '-O0 -g --coverage -lgcov'")
+
+    # check that the necessary executables are available
+    find_program(PLSSVM_LCOV lcov REQUIRED)
+    find_program(PLSSVM_GENHTML genhtml REQUIRED)
+    find_program(PLSSVM_CPPFILT c++filt)
+    if (PLSSVM_CPPFILT)
+        set(PLSSVM_DEMANGLE_USING_CPPFILT --demangle-cpp)
+    endif ()
 
-    message(STATUS "Enable code coverage analysis using lcov.")
-
-    # Create the coverage target. Run coverage tests with 'make coverage'
+    # Create the coverage target. Run coverage tests with 'ctest --build . --target coverage'
+    set(PLSSVM_COVERAGE_REPORT_OUTPUT_DIRECTORY "coverage_report")
     add_custom_target(
         coverage
-        COMMAND lcov --zerocounters --directory .
-        COMMAND lcov --capture -d . --initial --output-file test_base.info
-        COMMAND mkdir -p coverage
-        COMMAND ${CMAKE_MAKE_PROGRAM} test || true
-        COMMAND lcov --capture -d . --output-file test_test.info
-        COMMAND lcov --add-tracefile test_base.info --add-tracefile test_test.info -o test_total.info
-        COMMAND lcov --remove test_total.info '/usr/*' '*/build/*' '*/tests/*' '*/_deps/*' -o test_clean.info
-        COMMAND genhtml test_clean.info --output-directory coverage --title "PLSSVM Test Coverage" --show-details --legend
-        BYPRODUCTS ${CMAKE_BINARY_DIR}/test_base.info
-                   ${CMAKE_BINARY_DIR}/test_test.info
-                   ${CMAKE_BINARY_DIR}/test_total.info
-                   ${CMAKE_BINARY_DIR}/test_clean.info
-                   ${CMAKE_BINARY_DIR}/coverage
+        COMMENT "Running tests and generating coverage report..."
+        # Cleanup previous coverage data
+        COMMAND ${PLSSVM_LCOV} --directory . --zerocounters
+        COMMAND ${CMAKE_COMMAND} -E remove -f coverage.info coverage_init.info coverage_tests.info "${PLSSVM_COVERAGE_REPORT_OUTPUT_DIRECTORY}"
+        # Capture initial zero coverage baseline (useful for untouched files)
+        COMMAND ${PLSSVM_LCOV} --directory . --capture --initial --rc geninfo_unexecuted_blocks=1 --include '*/PLSSVM/src/*' --include '*/PLSSVM/include/*'
+                --output-file coverage_init.info
+        # Be sure the output directory exists
+        COMMAND mkdir -p "${PLSSVM_COVERAGE_REPORT_OUTPUT_DIRECTORY}"
+        # Run tests
+        COMMAND ${CMAKE_CTEST_COMMAND} --parallel 8 || true
+        # Capture coverage info
+        COMMAND ${PLSSVM_LCOV} --directory . --capture --rc geninfo_unexecuted_blocks=1 --include '*/PLSSVM/src/*' --include '*/PLSSVM/include/*' --output-file
+                coverage_tests.info
+        # Combine coverage files (in case of multiple test runs)
+        COMMAND ${PLSSVM_LCOV} --add-tracefile coverage_init.info --add-tracefile coverage_tests.info --output-file coverage.info
+        # Generate HTML report
+        COMMAND ${PLSSVM_GENHTML} coverage.info --output-directory "${PLSSVM_COVERAGE_REPORT_OUTPUT_DIRECTORY}" --title "PLSSVM Test Coverage" --show-details
+                --legend --frames ${PLSSVM_DEMANGLE_USING_CPPFILT}
+        # Summary message
+        COMMAND ${CMAKE_COMMAND} -E echo "Coverage report generated in: ${CMAKE_BINARY_DIR}/${PLSSVM_COVERAGE_REPORT_OUTPUT_DIRECTORY}"
+        # Specify byproducts for CMake
+        BYPRODUCTS "${CMAKE_BINARY_DIR}/coverage_init.info" "${CMAKE_BINARY_DIR}/coverage_tests.info" "${CMAKE_BINARY_DIR}/coverage.info"
+                   "${CMAKE_BINARY_DIR}/${PLSSVM_COVERAGE_REPORT_OUTPUT_DIRECTORY}"
+        # Set the working directory
         WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
     )
 
@@ -307,8 +376,10 @@ if (uppercase_CMAKE_BUILD_TYPE MATCHES COVERAGE)
         DEPENDS clean
         COMMENT "remove all coverage files"
         COMMAND ${CMAKE_MAKE_PROGRAM} clean
+        COMMAND ${CMAKE_COMMAND} -E remove -f coverage.info coverage_init.info coverage_tests.info ${PLSSVM_COVERAGE_REPORT_OUTPUT_DIRECTORY}
         COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/delete_coverage_files.cmake" TARGET clean_coverage
     )
+    list(POP_BACK CMAKE_MESSAGE_INDENT)
 endif ()
 
 ########################################################################################################################
@@ -544,7 +615,7 @@ endif ()
 #                                           check for Link Time Optimization                                           #
 ########################################################################################################################
 # enable Link Time Optimization (LTO)
-option(PLSSVM_ENABLE_LTO "Enable Link Time Optimizations." ON)
+option(PLSSVM_ENABLE_LTO "Enable Link Time Optimizations." OFF)
 if (PLSSVM_ENABLE_LTO)
     include(CheckIPOSupported)
     check_ipo_supported(RESULT PLSSVM_LTO_SUPPORTED LANGUAGES CXX)
@@ -590,15 +661,19 @@ else ()
     target_include_directories(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<BUILD_INTERFACE:${cxxopts_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include>)
 endif ()
 
-# try finding fast_float
-set(PLSSVM_fast_float_VERSION v6.1.3)
-find_package(fast_float QUIET)
+# ~~~
+# try finding fast_float note: We have to wrap fast_float into a small wrapper library to ensure that fast-math is never enabled.
+# Otherwise, the fast_float usage of std::numeric_limits<>::infinity() results in UB...
+# ~~~
+set(PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME plssvm-fast_float-wrapper)
+add_library(${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/fast_float_wrapper.cpp)
+set(PLSSVM_fast_float_VERSION v8.0.2)
+find_package(fast_float 8.0.0 QUIET)
 if (fast_float_FOUND)
     message(STATUS "Found package fast_float.")
-    target_include_directories(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC ${fast_float_INCLUDE_DIR})
+    target_include_directories(${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} PUBLIC ${fast_float_INCLUDE_DIR})
 else ()
     message(STATUS "Couldn't find package fast_float. Building version ${PLSSVM_fast_float_VERSION} from source.")
-    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_fast_float_VERSION="${PLSSVM_fast_float_VERSION}")
     # set options for fast_float
     set(FASTFLOAT_TEST OFF CACHE INTERNAL "" FORCE)
     set(FASTFLOAT_SANITIZE OFF CACHE INTERNAL "" FORCE)
@@ -610,9 +685,26 @@ else ()
         QUIET
     )
     FetchContent_MakeAvailable(fast_float)
-    add_dependencies(${PLSSVM_BASE_LIBRARY_NAME} fast_float)
-    target_include_directories(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<BUILD_INTERFACE:${fast_float_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include>)
+    add_dependencies(${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} fast_float)
+    target_include_directories(
+        ${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} PUBLIC $<BUILD_INTERFACE:${fast_float_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include>
+    )
 endif ()
+# ensure that fast-math is ALWAYS of
+target_compile_options(
+    ${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang,IntelLLVM>:-fno-fast-math>
+                                                      $<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/fp:precise>
+)
+target_include_directories(
+    ${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include>
+)
+target_compile_features(${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} PUBLIC cxx_std_17)
+if (PLSSVM_ENABLE_STL_DEBUG_MODE)
+    target_compile_definitions(${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} PUBLIC ${PLSSVM_STL_DEBUG_MODE_FLAGS})
+endif ()
+# link wrapper library against base library
+target_link_libraries(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC ${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME})
+list(APPEND PLSSVM_TARGETS_TO_INSTALL "${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME}")
 
 # try finding igor
 set(PLSSVM_igor_VERSION a5224c60d266974d3f407191583fe266cbe1c93d)
@@ -881,7 +973,7 @@ write_basic_package_version_file(
     COMPATIBILITY SameMajorVersion
 )
 
-# generate configuration file
+# generate configuration files
 configure_package_config_file(
     "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmConfig.cmake.in" "${PROJECT_BINARY_DIR}/plssvmConfig.cmake"
     INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/plssvm/cmake
@@ -890,6 +982,15 @@ configure_package_config_file(
     "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmHIPTargets.cmake.in" "${PROJECT_BINARY_DIR}/plssvmHIPTargets.cmake"
     INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/plssvm/cmake
 )
+configure_package_config_file(
+    "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmAdaptiveCppTargets.cmake.in" "${PROJECT_BINARY_DIR}/plssvmAdaptiveCppTargets.cmake"
+    INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/plssvm/cmake
+)
+string(REPLACE "\"" "\\\"" PLSSVM_ESCAPED_CXX_FLAGS "${CMAKE_CXX_FLAGS}") # necessary to correctly escape quotes
+configure_package_config_file(
+    "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmstdparTargets.cmake.in" "${PROJECT_BINARY_DIR}/plssvmstdparTargets.cmake"
+    INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/plssvm/cmake
+)
 
 # create and copy install-targets file
 install(
@@ -908,9 +1009,9 @@ install(
           "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmOpenCLTargets.cmake"
           "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmOpenMPTargets.cmake"
           "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmHPXTargets.cmake"
-          "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmAdaptiveCppTargets.cmake"
+          "${PROJECT_BINARY_DIR}/plssvmAdaptiveCppTargets.cmake"
           "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmDPCPPTargets.cmake"
           "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmKokkosTargets.cmake"
-          "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmstdparTargets.cmake"
+          "${PROJECT_BINARY_DIR}/plssvmstdparTargets.cmake"
     DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/plssvm/cmake
 )
diff --git a/CMakePresets.json b/CMakePresets.json
index e226c44fd..3f760f34a 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -14,6 +14,7 @@
     "cmake/presets/opencl.json",
     "cmake/presets/acpp.json",
     "cmake/presets/dpcpp.json",
+    "cmake/presets/icpx.json",
     "cmake/presets/kokkos.json",
     "cmake/presets/all.json"
   ]
diff --git a/README.md b/README.md
index 532550a93..797ba4133 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,7 @@
   - [Training using `plssvm-train`](#training-using-plssvm-train)
   - [Predicting using `plssvm-predict`](#predicting-using-plssvm-predict)
   - [Data Scaling using `plssvm-scale`](#data-scaling-using-plssvm-scale)
+  - [Distributed Memory Support via MPI](#distributed-memory-support-via-mpi)
   - [Example Code for PLSSVM Used as a Library](#example-code-for-plssvm-used-as-a-library)
   - [Example Using the `sklearn` Python Bindings Available For PLSSVM](#example-using-the-sklearn-like-python-bindings-available-for-plssvm)
 - [Citing PLSSVM](#citing-plssvm)
@@ -58,15 +59,22 @@ The main highlights of our SVM implementations are:
 1. Drop-in replacement for LIBSVM's `svm-train`, `svm-predict`, and `svm-scale` (some features currently not implemented).
 2. Support of multiple different programming frameworks for parallelization (also called backends in our PLSSVM implementation) which allows us to target GPUs and CPUs from different vendors like NVIDIA, AMD, or Intel:
    - [OpenMP](https://www.openmp.org/)
-   - [HPX](https://hpx.stellar-group.org/)
-   - [stdpar](https://en.cppreference.com/w/cpp/algorithm) (supported implementations are [nvc++](https://developer.nvidia.com/hpc-sdk) from NVIDIA's HPC SDK, [roc-stdpar](https://github.com/ROCm/roc-stdpar) as a patched LLVM, [icpx](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html) as Intel's oneAPI compiler, [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp), and [GNU GCC](https://gcc.gnu.org/) using TBB). <br>
+   - [HPX](https://hpx.stellar-group.org/) (tested with current master)
+   - C++ 17's standard parallelism [stdpar](https://en.cppreference.com/w/cpp/algorithm):<br>
      **Note**: due to the nature of the used USM mechanics in the `stdpar` implementations, the `stdpar` backend **can't** be enabled together with **any** other backend! <br>
-     **Note**: since every translation units need to be compiled with the same flag, we currently globally set `CMAKE_CXX_FLAGS` although it's discouraged in favor of `target_compile_options`.
-   - [CUDA](https://developer.nvidia.com/cuda-zone)
-   - [HIP](https://github.com/ROCm-Developer-Tools/HIP)
-   - [OpenCL](https://www.khronos.org/opencl/)
-   - [SYCL](https://www.khronos.org/sycl/) (supported implementations are Intel's [DPC++/icpx](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) (formerly known as hipSYCL); specifically the versions [intel-oneapi-compilers@2025.0.0](https://github.com/spack/spack) (via spack) and AdaptiveCpp release [v24.06.0](https://github.com/AdaptiveCpp/AdaptiveCpp/releases/tag/v23.10.0))
-   - [Kokkos](https://github.com/kokkos/kokkos) (all execution spaces supported except `OpenMPTarget` and `OpenACC`); specifically the version [4.5.00](https://github.com/kokkos/kokkos/releases/tag/4.5.00)
+     **Note**: since every translation units need to be compiled with the same flag, we currently globally set `CMAKE_CXX_FLAGS` although it's discouraged.
+     - [nvc++](https://developer.nvidia.com/hpc-sdk) from NVIDIA's HPC SDK (tested with version [25.3](https://docs.nvidia.com/hpc-sdk/hpc-sdk-release-notes/index.html))
+     - [roc-stdpar](https://github.com/ROCm/roc-stdpar) merged into upstream LLVM starting with version 18 (tested with version [18](https://releases.llvm.org/))
+     - [icpx](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html) as Intel's oneAPI compiler (tested with version [2025.0.0](https://www.intel.com/content/www/us/en/developer/articles/release-notes/oneapi-dpcpp/2025.html))
+     - [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) (tested with version [v24.10.0](https://github.com/AdaptiveCpp/AdaptiveCpp/releases/tag/v24.10.0))
+     - [GNU GCC](https://gcc.gnu.org/) using TBB (tested with version GCC [14.2.0](https://gcc.gnu.org/onlinedocs/14.2.0/)) 
+   - [CUDA](https://developer.nvidia.com/cuda-zone) (tested with version [12.6.3](https://developer.nvidia.com/cuda-12-6-3-download-archive))
+   - [HIP](https://github.com/ROCm-Developer-Tools/HIP) (tested with version [6.3.3](https://rocm.docs.amd.com/projects/HIP/en/docs-6.3.3/))
+   - [OpenCL](https://www.khronos.org/opencl/) (tested with CUDA and ROCm provided OpenCL implementations as well as [PoCL](https://github.com/pocl/pocl) version [v6.0](https://github.com/pocl/pocl/releases/tag/v6.0))
+   - [SYCL](https://www.khronos.org/sycl/):
+     - [DPC++/icpx](https://github.com/intel/llvm) as Intel's oneAPI compiler (tested with version [2025.0.0](https://www.intel.com/content/www/us/en/developer/articles/release-notes/oneapi-dpcpp/2025.html))
+     - [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp), formerly known as hipSYCL (tested with version [v24.10.0](https://github.com/AdaptiveCpp/AdaptiveCpp/releases/tag/v24.10.0))
+   - [Kokkos](https://github.com/kokkos/kokkos) (all execution spaces supported except `OpenMPTarget` and `OpenACC`) (tested with version [4.6.00](https://github.com/kokkos/kokkos/releases/tag/4.6.00))
 3. Six different kernel functions to be able to classify a large variety of different problems:
    - linear: $\vec{u}^T$ $\cdot$ $\vec{v}$
    - polynomial: $(\gamma$ $\cdot$ $\vec{u}^T$ $\cdot$ $\vec{v}$ $+$ $coef0)^{d}$
@@ -82,7 +90,8 @@ The main highlights of our SVM implementations are:
    - OAO: constructs many but smaller binary classifications. The resulting model file is **fully** compatible with LIBSVM.
 6. Also, support for the regression task.
 7. Multi-GPU support for **all** kernel functions and GPU backends for `fit` as well as `predict/score` (**note**: no multi-GPU support for the stdpar backend even if run on a GPU!).
-8. Python bindings as drop-in replacement for `sklearn.SVC` and `sklearn.SVR` (some features currently not implemented).
+8. Distributed memory support via [MPI](https://www.mpi-forum.org/) for all backends.
+9. Python bindings as drop-in replacement for `sklearn.SVC` and `sklearn.SVR` (some features currently not implemented).
 
 To see the full power of Support Vector Machines, have a look at our live visualization examples in 
 [examples/python/interactive](examples/python/interactive/README.md).
@@ -95,11 +104,12 @@ General dependencies:
 
 - a C++17 capable compiler (e.g. [`gcc`](https://gcc.gnu.org/) or [`clang`](https://clang.llvm.org/))
 - [CMake](https://cmake.org/) 3.25 or newer
-- [cxxopts ≥ v3.2.0](https://github.com/jarro2783/cxxopts), [fast_float ≥ v6.1.3](https://github.com/fastfloat/fast_float), [{fmt} ≥ v11.0.2](https://github.com/fmtlib/fmt), and [igor](https://github.com/bluescarni/igor) (all four are automatically build during the CMake configuration if they couldn't be found using the respective `find_package` call)
-- [GoogleTest ≥ v1.15.2](https://github.com/google/googletest) if testing is enabled (automatically build during the CMake configuration if `find_package(GTest)` wasn't successful)
+- [cxxopts ≥ v3.2.0](https://github.com/jarro2783/cxxopts), [fast_float ≥ v8.0.2](https://github.com/fastfloat/fast_float), [{fmt} ≥ v11.0.2](https://github.com/fmtlib/fmt), and [igor](https://github.com/bluescarni/igor) (all four are automatically build during the CMake configuration if they couldn't be found using the respective `find_package` call)
+- [GoogleTest ≥ v1.16.0](https://github.com/google/googletest) if testing is enabled (automatically build during the CMake configuration if `find_package(GTest)` wasn't successful)
 - [doxygen](https://www.doxygen.nl/index.html) if documentation generation is enabled
-- [Pybind11 ≥ v2.13.3](https://github.com/pybind/pybind11) if Python bindings are enabled
+- [Pybind11 ≥ v2.13.6](https://github.com/pybind/pybind11) if Python bindings are enabled
 - [OpenMP](https://www.openmp.org/) 4.0 or newer (optional) to speed-up library utilities (like file parsing)
+- [MPI](https://www.mpi-forum.org/) if distributed memory systems should be supported; [mpi4py](https://mpi4py.readthedocs.io/en/stable/) to enable interoperability in our Python bindings
 - [Format.cmake](https://github.com/TheLartians/Format.cmake) if auto formatting via cmake-format and clang-format is enabled; also requires at least clang-format-18 and git, additionally, needs our custom [cmake-format fork](https://github.com/vancraar/cmake_format) incorporating some patches
 - multiple Python modules used in the utility scripts, to install all modules use `pip install --user -r install/python_requirements.txt`
 
@@ -113,12 +123,12 @@ Additional dependencies for the stdpar backend:
 
 Additional dependencies for the HPX backend:
 
-- [HPX ≥ v1.9.0](https://hpx.stellar-group.org/)
+- [HPX @ current master](https://hpx.stellar-group.org/)
 
 Additional dependencies for the CUDA backend:
 
 - CUDA SDK
-- either NVIDIA [`nvcc`](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html) or [`clang` with CUDA support enabled](https://llvm.org/docs/CompileCudaWithLLVM.html)
+- either NVIDIA [`nvcc`](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html), [`nvc++`](https://developer.nvidia.com/hpc-sdk) or [`clang` with CUDA support enabled](https://llvm.org/docs/CompileCudaWithLLVM.html)
 
 Additional dependencies for the HIP backend:
 
@@ -128,6 +138,7 @@ Additional dependencies for the HIP backend:
 Additional dependencies for the OpenCL backend:
 
 - OpenCL runtime and header files
+- e.g., the CUDA or ROCm provided OpenCL runtimes or [PoCL](https://github.com/pocl/pocl)
 
 Additional dependencies for the SYCL backend:
 
@@ -232,11 +243,6 @@ python3 utility_scripts/plssvm_target_platforms.py --quiet
 cpu:avx512;nvidia:sm_86
 ```
 
-If the architectural information for the requested GPU could not be retrieved, one option would be to have a look at:
-
-- for Intel GPUs and CPUs: [Ahead of Time Compilation](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html) and [Intel graphics processor table](https://dgpu-docs.intel.com/devices/hardware-table.html)
-
-
 #### Optional CMake Options
 
 The `[optional_options]` can be one or multiple of:
@@ -283,12 +289,17 @@ The `[optional_options]` can be one or multiple of:
 
 **Attention:** at least one backend must be enabled and available!
 
+- `PLSSVM_ENABLE_MPI=ON|OFF|AUTO` (default: `AUTO`):
+    - `ON`: check for MPI and fail if not available
+    - `AUTO`: check for MPI but **do not** fail if not available
+    - `OFF`: do not check for MPI
+
 - `PLSSVM_ENABLE_FAST_MATH=ON|OFF` (default depending on `CMAKE_BUILD_TYPE`: `ON` for Release or RelWithDebInfo, `OFF` otherwise): enable `fast-math` compiler flags for all backends
 - `PLSSVM_ENABLE_ASSERTS=ON|OFF` (default: `OFF`): enables custom assertions
 - `PLSSVM_USE_FLOAT_AS_REAL_TYPE=ON|OFF` (default: `OFF`): use `float` as real_type instead of `double`
 - `PLSSVM_THREAD_BLOCK_SIZE` (default: `8`): set a specific thread block size used in the GPU kernels (for fine-tuning optimizations)
 - `PLSSVM_INTERNAL_BLOCK_SIZE` (default: `4`): set a specific internal block size used in the GPU kernels (for fine-tuning optimizations)
-- `PLSSVM_ENABLE_LTO=ON|OFF` (default: `ON`): enable interprocedural optimization (IPO/LTO) if supported by the compiler
+- `PLSSVM_ENABLE_LTO=ON|OFF` (default: `OFF`): enable interprocedural optimization (IPO/LTO) if supported by the compiler
 - `PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE=ON|OFF` (default: `ON`): enforce the maximum (device) memory allocation size for the plssvm::solver_type::automatic solver
 - `PLSSVM_ENABLE_DOCUMENTATION=ON|OFF` (default: `OFF`): enable the `doc` target using doxygen
 - `PLSSVM_ENABLE_PERFORMANCE_TRACKING=ON|OFF` (default: `OFF`): enable gathering performance characteristics for the three executables using YAML files; example Python3 scripts to perform performance measurements and to process the resulting YAML files can be found in the `utility_scripts/` directory (requires the Python3 modules [wrapt-timeout-decorator](https://pypi.org/project/wrapt-timeout-decorator/), [`pyyaml`](https://pyyaml.org/), and [`pint`](https://pint.readthedocs.io/en/stable/))
@@ -313,7 +324,6 @@ If `PLSSVM_ENABLE_PERFORMANCE_TRACKING` is set to `ON`, the following option can
 
 If `PLSSVM_ENABLE_HARDWARE_SAMPLING` is set to `ON`, the following options can also be set:
 
-- `PLSSVM_HARDWARE_SAMPLING_ENABLE_ERROR_CHECKS=ON|OFF` (default: `OFF`): enable some runtime error checks for the hardware sampling libraries
 - `PLSSVM_HARDWARE_SAMPLING_INTERVAL` (default: `100`): the sampling interval for the `plssvm-train`, `plssvm-predict`, and `plssvm-scale` executables in **milliseconds**
 
 If `PLSSVM_ENABLE_LANGUAGE_BINDINGS` is set to `ON`, the following option can also be set:
@@ -340,9 +350,7 @@ To use DPC++/icpx for SYCL, simply set the `CMAKE_CXX_COMPILER` to the respectiv
 
 If the SYCL implementation is DPC++/icpx the following additional options are available:
 
-- `PLSSVM_SYCL_BACKEND_DPCPP_ENABLE_AOT` (default: `ON`): enable Ahead-of-Time (AOT) compilation for the specified target platforms
 - `PLSSVM_SYCL_BACKEND_DPCPP_USE_LEVEL_ZERO` (default: `ON`): use DPC++/icpx's Level-Zero backend instead of its OpenCL backend **(only available if a CPU or Intel GPU is targeted)**
-- `PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_USE_HIP` (default: `ON`): use DPC++/icpx's HIP backend instead of its OpenCL backend for AMD GPUs **(only available if an AMD GPU is targeted)**
 
 If the SYCL implementation is AdaptiveCpp the following additional option is available:
 
@@ -353,14 +361,25 @@ If more than one SYCL implementation is available the environment variables `PLS
 
 - `PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION` (`dpcpp`|`adaptivecpp`): specify the preferred SYCL implementation if the `sycl_implementation_type` option is set to `automatic`; additional the specified SYCL implementation is used in the `plssvm::sycl` namespace, the other implementations are available in the `plssvm::dpcpp` and `plssvm::adaptivecpp` namespace respectively
 
-If the Kokkos backend is available the following additional option is available (**note**: this option takes only effect if the Kokkos SYCL execution space is available):
+If the Kokkos backend is available, an additional option can be set.
 
-- `PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT` (default: `ON`): enable Ahead-of-Time (AOT) compilation for the specified target platforms
+- `PLSSVM_KOKKOS_BACKEND_SYCL_ENABLE_MULTI_GPU` (default: `OFF`): enable multi-GPU support for the Kokkos::SYCL execution space; broken in Kokkos as of version 4.6.00!
 
-If the stdpar backend is available, an additional options can be set.
+If the stdpar backend is available, an additional option can be set.
 
 - `PLSSVM_STDPAR_BACKEND_IMPLEMENTATION` (default: `AUTO`): explicitly specify the used stdpar implementation; must be one of: `AUTO`, `NVHPC`, `roc-stdpar`, `IntelLLVM`, `ACPP`, `GNU_TBB`.
 
+If the stdpar implementation is AdaptiveCpp, the following additional option is available:
+
+- `PLSSVM_STDPAR_BACKEND_ACPP_USE_GENERIC_SSCP` (default: `ON`): use AdaptiveCpp's new SSCP compilation flow
+- 
+If the stdpar implementation is roc-stdpar, the following additional option is available:
+
+- `PLSSVM_STDPAR_BACKEND_ROCSTDPAR_USE_INTERPOSE_ALLOC=ON|OFF|AUTO` (default: `AUTO`):
+    - `ON`: always set the `--hipstdpar-interpose-alloc` compiler flag
+    - `AUTO`: only set the `--hipstdpar-interpose-alloc` compiler flag if the environment variable `HSA_XNACK` is not defined or set to `0`
+    - `OFF`: never set the `--hipstdpar-interpose-alloc` compiler flag
+
 #### CMake presets
 
 We also provide a number of basic CMake presets. We currently have `configure`, `build`, `test`, and `workflow` presets. 
@@ -406,9 +425,12 @@ Available configure presets:
   "acpp"                    - AdaptiveCpp SYCL backend
   "acpp_python"             - AdaptiveCpp SYCL backend + Python bindings
   "acpp_test"               - AdaptiveCpp SYCL backend tests
-  "dpcpp"                   - DPC++/icpx SYCL backend
-  "dpcpp_python"            - DPC++/icpx backend + Python bindings
-  "dpcpp_test"              - DPC++/icpx backend tests
+  "dpcpp"                   - DPC++ SYCL backend
+  "dpcpp_python"            - DPC++ backend + Python bindings
+  "dpcpp_test"              - DPC++ backend tests
+  "icpx"                    - icpx SYCL backend
+  "icpx_python"             - icpx backend + Python bindings
+  "icpx_test"               - icpx backend tests
   "kokkos"                  - Kokkos backend
   "kokkos_python"           - Kokkos backend + Python bindings
   "kokkos_test"             - Kokkos backend tests
@@ -428,6 +450,8 @@ However, these additional options can be enabled using normal CMake options.
 
 **Note**: the `all` presets always exclude the `stdpar` backend since it is currently not supported to enable them with any other backend.
 
+**Note**: the only difference between the dpcpp and icpx presets is the automatically set `CMAKE_CXX_COMPILER`. Internally, both presets use the same SYCL implementation.
+
 ### Running the Tests
 
 To run the tests after building the library (with `PLSSVM_ENABLE_TESTING` set to `ON`) use:
@@ -444,6 +468,8 @@ ctest
 
 **Note:** the stdpar tests may fail if executed in parallel via `ctest -j $(nproc)`.
 
+**Note:** our tests do not support the execution with more than one MPI process launched via `mpirun`.
+
 ### Generating Test Coverage Results
 
 To enable the generation of test coverage reports using `locv` the library must be compiled using the custom `Coverage` `CMAKE_BUILD_TYPE`.
@@ -665,6 +691,8 @@ Usage:
                                 choose the Kokkos execution space to be used in the Kokkos backend: automatic|Cuda|OpenMP|Serial (default: automatic)
       --performance_tracking arg
                                 the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr
+      --mpi_load_balancing_weights arg
+                                can be used to load balance for MPI (must be integers); number of provided values must match the number of MPI ranks
       --use_strings_as_labels   use strings as labels instead of plane numbers
       --verbosity               choose the level of verbosity: full|timing|libsvm|quiet (default: full)
   -q, --quiet                   quiet mode (no outputs regardless the provided verbosity level!)
@@ -771,6 +799,8 @@ Usage:
                                 choose the Kokkos execution space to be used in the Kokkos backend: automatic|Cuda|OpenMP|Serial (default: automatic)
       --performance_tracking arg
                                 the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr
+      --mpi_load_balancing_weights arg
+                                can be used to load balance for MPI (must be integers); number of provided values must match the number of MPI ranks
       --use_strings_as_labels   use strings as labels instead of plane numbers
       --verbosity               choose the level of verbosity: full|timing|libsvm|quiet (default: full)
   -q, --quiet                   quiet mode (no outputs regardless the provided verbosity level!)
@@ -834,6 +864,37 @@ An example invocation to scale a train and test file in the same way looks like:
 ./plssvm-scale -r scaling_parameter.txt test_file.libsvm test_file_scaled.libsvm
 ```
 
+### Distributed Memory Support via MPI
+
+We support distributed memory via MPI for `plssvm-train` and `plssvm-predict` while simultaneously allowing multiple devices per MPI rank.
+In order to use it, MPI must be found during the CMake configuration step.
+Note that if MPI couldn't be found, PLSSVM still works in shared memory mode only and internally disables all MPI related functionality.
+For example, to run PLSSVM via MPI on four nodes simply use the normal `mpirun` command:
+
+```bash
+mpirun -N 4 ./plssvm-train --backend cuda --input /path/to/data_file
+```
+
+We also have support for a rudimentary, manual load balancing: 
+
+```bash
+mpirun -N 4 ./plssvm-train mpi_load_balancing_weights=1,2,2,1 --backend cuda --input /path/to/data_file
+```
+
+The above command results in MPI rank 1 and 2 computing twice the matrix elements than the ranks 0 and 3. 
+This can be used to load balance our computations in scenarios where heterogeneous hardware is used. 
+Note that the number of provided load balancing weights must be equal to the used MPI ranks and is independent of the number of devices per MPI rank. 
+If one MPI rank has more than one device, all these devices on one MPI rank compute the same number of matrix elements. 
+
+Our MPI implementation, however, currently has some limitations:
+- the training, test, and model data is fully read by **every** MPI rank
+- the training, test, and model data is fully stored on **each** compute device on **every** MPI rank
+- **only** the kernel matrix is really divided across **all** MPI ranks
+- while the expensive BLAS level 3 operations in the CG algorithm are computed in a distributed way, everything else is computed on **every** MPI rank
+- in the CG algorithm we communicate the whole matrix, although it would be sufficient to communicate only matrix parts
+- **only** the **main** MPI rank (per default rank 0) writes the output files
+- `plssvm-scale` **does not** support more than one MPI rank
+
 ### Example Code for PLSSVM Used as a Library
 
 A simple C++ program (`main_classification.cpp`) using PLSSVM as library for classification could look like:
@@ -919,9 +980,9 @@ int main() {
         std::cout << "model accuracy: " << model_accuracy << std::endl;
 
         // predict the labels
-        const std::vector<double> predicted_values = svc->predict(model, test_data);
+        const std::vector<plssvm::real_type> predicted_values = svc->predict(model, test_data);
         // output a more complete regression report
-        const std::vector<double> &correct_values = test_data.labels().value();
+        const std::vector<plssvm::real_type> &correct_values = test_data.labels().value();
         std::cout << plssvm::regression_report{ correct_label, predicted_label } << std::endl;
 
         // write model file to disk
@@ -936,27 +997,40 @@ int main() {
 }
 ```
 
+The `examples/cpp` directory also contains the same examples using MPI to support distributed memory systems.
+
 With a corresponding minimal CMake file:
 
 ```cmake
 cmake_minimum_required(VERSION 3.25)
 
-project(LibraryUsageExample
-        LANGUAGES CXX)
+project(LibraryUsageExample LANGUAGES CXX)
 
-find_package(plssvm REQUIRED)
+find_package(plssvm CONFIG REQUIRED)
 # CMake's COMPONENTS mechanism can also be used if a specific library component is required, e.g.:
 # find_package(plssvm REQUIRED COMPONENTS CUDA)
 
+# classification executable example
 add_executable(classification main_classification.cpp)
+# classification executable example using MPI
+add_executable(classification_mpi main_classification_mpi.cpp)
+# regression executable example
 add_executable(regression main_regression.cpp)
-
-target_compile_features(prog PUBLIC cxx_std_17)
-target_link_libraries(prog PUBLIC plssvm::all)
-# can also only link against a single library component, e.g.:
-# target_link_libraries(prog PUBLIC plssvm::cuda)
+# regression executable example using MPI
+add_executable(regression_mpi main_regression_mpi.cpp)
+
+# link PLSSVM against executables
+foreach (target classification classification_mpi regression regression_mpi)
+    target_compile_features(${target} PUBLIC cxx_std_17)
+    target_link_libraries(${target} PUBLIC plssvm::plssvm)
+    # can also only link against a single library component, e.g.:
+    # target_link_libraries(${target} PUBLIC plssvm::cuda)
+endforeach ()
 ```
 
+The `examples/python` directory contains the same examples using our PLSSVM Python bindings. 
+Additionally, it contains Python examples leveraging MPI to target distributed memory systems. 
+
 ### Example Using the `sklearn` like Python Bindings Available For PLSSVM
 
 A classification example using PLSSVM's `SVC` Python binding and sklearn's breast cancer data set:
@@ -977,7 +1051,7 @@ import sklearn.datasets
 import sklearn.metrics
 import sklearn.inspection
 import numpy as np
-from plssvm import SVC  # identical to from sklearn.svm import SVC
+from plssvm.svm import SVC  # identical to from sklearn.svm import SVC
 
 # load the breast cancer datasets
 cancer = sklearn.datasets.load_breast_cancer()
@@ -1076,7 +1150,7 @@ y_rbf_sklearn = sklearn_svr_rbf.fit(X, y).predict(X)
 plt.plot(X, y_rbf_sklearn, lw=2, linestyle='dashed', label='RBF model sklearn')
 
 # fit the PLSSVM regression model
-from plssvm import SVR
+from plssvm.svm import SVR
 
 plssvm_svr_lin = SVR(kernel='linear', C=100)
 y_lin_plssvm = plssvm_svr_lin.fit(X, y).predict(X)
diff --git a/bindings/Python/CMakeLists.txt b/bindings/Python/CMakeLists.txt
index 54f967f2a..19e8777c1 100644
--- a/bindings/Python/CMakeLists.txt
+++ b/bindings/Python/CMakeLists.txt
@@ -11,8 +11,8 @@ message(STATUS "Building Python language bindings for PLSSVM.")
 find_package(Python COMPONENTS Interpreter Development)
 
 # try finding pybind11
-set(PLSSVM_pybind11_VERSION v2.13.3)
-find_package(pybind11 2.13.3 QUIET)
+set(PLSSVM_pybind11_VERSION v2.13.6)
+find_package(pybind11 2.13.6 QUIET)
 if (pybind11_FOUND)
     message(STATUS "Found package pybind11.")
 else ()
@@ -31,30 +31,29 @@ endif ()
 
 # set source files that are always used
 set(PLSSVM_PYTHON_BINDINGS_SOURCES
+    ${CMAKE_CURRENT_LIST_DIR}/data_set/classification_data_set.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/data_set/min_max_scaler.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/data_set/regression_data_set.cpp
     ${CMAKE_CURRENT_LIST_DIR}/exceptions/exceptions.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/version/version.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/backend_types.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/classification_types.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/model/classification_model.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/model/regression_model.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/sklearn_like/svc.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/sklearn_like/svr.cpp
     ${CMAKE_CURRENT_LIST_DIR}/svm/csvm.cpp
     ${CMAKE_CURRENT_LIST_DIR}/svm/csvc.cpp
     ${CMAKE_CURRENT_LIST_DIR}/svm/csvr.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/data_set/classification_data_set.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/data_set/min_max_scaler.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/data_set/regression_data_set.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/backend_types.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/classification_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/file_format_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/gamma.cpp
     ${CMAKE_CURRENT_LIST_DIR}/kernel_function_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/kernel_functions.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/model/classification_model.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/model/regression_model.cpp
     ${CMAKE_CURRENT_LIST_DIR}/parameter.cpp
     ${CMAKE_CURRENT_LIST_DIR}/regression_report.cpp
     ${CMAKE_CURRENT_LIST_DIR}/solver_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/svm_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/target_platforms.cpp
     ${CMAKE_CURRENT_LIST_DIR}/verbosity_levels.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/sklearn_svc.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/sklearn_svr.cpp
     ${CMAKE_CURRENT_LIST_DIR}/main.cpp
 )
 
@@ -119,15 +118,16 @@ target_compile_definitions(${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE PYBIND
 target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,Clang,IntelLLVM>:-Wno-self-assign-overloaded>)
 target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC -fPIC)
 
-# append pybind11 bindings library to installed targets
-append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME})
+include(GNUInstallDirs)
+# install Python library
+install(TARGETS ${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME} LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all shared lib files
+)
 
 # install necessary Python files to make pip install plssvm work correctly:
 #
 # - __init__.py: PLSSVM is correctly recognized as Python package
 # - __cli__.py: PLSSVM's executables are correctly usable
 # - __install_check__.py: custom script outputting some PLSSVM build information
-include(GNUInstallDirs)
 install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/__init__.py" "${CMAKE_CURRENT_SOURCE_DIR}/__cli__.py" "${CMAKE_CURRENT_SOURCE_DIR}/__install_check__.py"
         DESTINATION "${CMAKE_INSTALL_LIBDIR}"
 )
diff --git a/bindings/Python/README.md b/bindings/Python/README.md
index fdb812fbd..196dbfc7b 100644
--- a/bindings/Python/README.md
+++ b/bindings/Python/README.md
@@ -17,10 +17,10 @@
         - [plssvm.CSVC and plssvm.CSVR](#plssvmcsvc-and-plssvmcsvr)
         - [The backend C-SVCs and C-SVRs](#the-backend-c-svcs-and-c-svrs)
         - [plssvm.ClassificationModel and plssvm.RegressionModel](#plssvmclassificationmodel-and-plssvmregressionmodel)
-        - [plssvm.Version](#plssvmversion)
-        - [plssvm.detail.tracking.PerformanceTracker](#plssvmdetailtrackingperformancetracker)
-        - [plssvm.detail.tracking.Events](#plssvmdetailtrackingevent-plssvmdetailtrackingevents)
+        - [plssvm.performance_tracking](#plssvmperformance_tracking)
+        - [plssvm.performance_tracking.Events](#plssvmperformance_trackingevent-plssvmperformance_trackingevents)
     - [Free functions](#free-functions)
+    - [Module Level Attributes](#module-level-attributes)
     - [Exceptions](#exceptions)
 
 We currently support two kinds of Python3 bindings, one reflecting the API
@@ -63,8 +63,8 @@ new `SVC`:
 |          :x:          | `break_ties : bool, default=False`                                                         | If true, decision_function_shape='ovr', and number of classes > 2, predict will break ties according to the confidence values of decision_function; otherwise the first class among the tied classes is returned. **Note**: PLSSVM behaves as if False was provided. |
 |          :x:          | `random_state : int, RandomState instance or None, default=None`                           | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when `probability` is False.                                                                                                                                  |
 
-**Note**: the `plssvm.SVC` automatically uses the optimal (in the sense of performance) backend and target platform, as
-they were made available during PLSSVM's build step.
+**Note**: the `plssvm.svm.SVC` automatically uses the optimal (in the sense of performance) backend and target platform, 
+as they were made available during PLSSVM's build step.
 
 ### Attributes
 
@@ -215,8 +215,8 @@ new `SVR`:
 |  :white_check_mark:   | `max_iter : int, default=-1`                                                               | Hard limit on iterations within solver, or -1 for no limit. **Note**: if -1 is provided, at most `#data_points - 1` many CG iterations are performed.                                                            |
 |          :x:          | `epsilon : real_type, default=0.1`                                                         | The epsilon-tube within which no penalty is associated in the training loss function. **Note**: not applicable to PLSSVM's regression notation.                                                                  |
 
-**Note**: the `plssvm.SVR` automatically uses the optimal (in the sense of performance) backend and target platform, as
-they were made available during PLSSVM's build step.
+**Note**: the `plssvm.svm.SVR` automatically uses the optimal (in the sense of performance) backend and target platform, 
+as they were made available during PLSSVM's build step.
 
 ### Attributes
 
@@ -349,6 +349,8 @@ If the Kokos backend is available, an additional enumeration is available:
 |------------------|----------------------------------------------------------------------------------------|--------------------------------------------------|
 | `ExecutionSpace` | `CUDA`, `HIP`, `SYCL`, `HPX`, `OPENMP`, `OPENMPTARGET`, `OPENACC`, `THREADS`, `SERIAL` | The different supported Kokkos execution spaces. |
 
+**Note**: all our enumerations support implicit conversions from Python strings to the correct PLSSVM enumeration value.
+
 ### Classes and submodules
 
 The following tables list all PLSSVM classes exposed on the Python side:
@@ -357,11 +359,9 @@ The following tables list all PLSSVM classes exposed on the Python side:
 
 The parameter class encapsulates all necessary hyperparameters needed to fit an SVM.
 
-| constructors                                                                                         | description                                                                      |
-|------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------|
-| `Parameter()`                                                                                        | Default construct a parameter object.                                            |
-| `Parameter(kernel_type, degree, gamma, coef0, cost)`                                                 | Construct a parameter object by explicitly providing each hyper-parameter value. |
-| `Parameter([kernel_type=KernelFunctionType.RBF, degree=3, gamma=*1/#features*, coef=0.0, cost=1.0])` | Construct a parameter object with the provided named parameters.                 |
+| constructors                                                                                                                  | description                                                            |
+|-------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------|
+| `Parameter(kernel_type=plssvm.KernelFunctionType.RBF, degree=3, gamma=plssvm.GammaCoefficientType.AUTO, coef0=0.0, cost=1.0)` | Construct a parameter object using the provided hyper-parameter value. |
 
 | attributes                         | description                                                                                                                                                                                                                                                     |
 |------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -386,23 +386,24 @@ file, they must be explicitly stated using the `type` parameter.
 
 The following constructors and methods are available for both the classification and regression data sets:
 
-| constructors                                                                                                                                   | description                                                                                                                                                                                                                                                                                                                                    |
-|------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `ClassificationDataSet(filename, [type=*the used label type*, file_format=*depending on the extesion of the filename*, scaling=*no scaling*])` | Construct a new data set using the data provided in the given file. Default type: `std::string` for the ClassificationDataSet, `double` for the RegressionDataSet. Default file format: determines the file content based on its extension (.arff, everything else assumed to be a LIBSVM file). Default scaling: don't scale the data points. |
-| `ClassificationDataSet(data, [type=*the used label type*, scaling=*no scaling*])`                                                              | Construct a new data set using the provided data directly. Default type: `std::string` for the ClassificationDataSet, `double` for the RegressionDataSet. Default scaling: don't scale the data points.                                                                                                                                        |
-| `ClassificationDataSet(data, labels, [scaling=*no scaling*])`                                                                                  | Construct a new data set using the provided data and labels directly. Default scaling: don't scale the data points.                                                                                                                                                                                                                            |
-
-| methods             | description                                                                                                                                                        |
-|---------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `save(filename)`    | Save the current data set to the provided file.                                                                                                                    |
-| `num_data_points()` | Return the number of data points in the data set.                                                                                                                  |
-| `num_features()`    | Return the number of features in the data set.                                                                                                                     |
-| `data()`            | Return the data points.                                                                                                                                            |
-| `has_labels()`      | Check whether the data set is annotated with labels.                                                                                                               |
-| `labels()`          | Return the labels, if present.                                                                                                                                     |
-| `is_scaled()`       | Check whether the data points have been scaled.                                                                                                                    |
-| `scaling_factors()` | Return the scaling factors, if the data set has been scaled.                                                                                                       |
-| `print(data_set)`   | Overload to print a data set object displaying the label type, the number of data points and features as well as the classes and scaling interval (if applicable). |
+| constructors                                                                                                                                                            | description                                                                                                                                                                                                                                                                                                                                   |
+|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `ClassificationDataSet(filename, *, type=*the used label type*, format=*depending on the extesion of the filename*, scaler=*no scaling*, comm=*used MPI communicator*)` | Construct a new data set using the data provided in the given file. Default type: `std::string` for the ClassificationDataSet, `double` for the RegressionDataSet. Default file format: determines the file content based on its extension (.arff, everything else assumed to be a LIBSVM file). Default scaler: don't scale the data points. |
+| `ClassificationDataSet(X, *, type=*the used label type*, scaler=*no scaling*, comm=*used MPI communicator*)`                                                            | Construct a new data set using the provided data directly. Default type: `std::string` for the ClassificationDataSet, `double` for the RegressionDataSet. Default scaler: don't scale the data points.                                                                                                                                        |
+| `ClassificationDataSet(X, y, *, scaler=*no scaling*, comm=*used MPI communicator*)`                                                                                     | Construct a new data set using the provided data and labels directly. Default scaler: don't scale the data points.                                                                                                                                                                                                                            |
+
+| methods                                        | description                                                                                                                                                        |
+|------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `save(filename, *, format=*used file format*)` | Save the current data set to the provided file.                                                                                                                    |
+| `data()`                                       | Return the data points.                                                                                                                                            |
+| `has_labels()`                                 | Check whether the data set is annotated with labels.                                                                                                               |
+| `labels()`                                     | Return the labels, if present.                                                                                                                                     |
+| `num_data_points()`                            | Return the number of data points in the data set.                                                                                                                  |
+| `num_features()`                               | Return the number of features in the data set.                                                                                                                     |
+| `is_scaled()`                                  | Check whether the data points have been scaled.                                                                                                                    |
+| `scaling_factors()`                            | Return the scaling factors, if the data set has been scaled.                                                                                                       |
+| `communicator()`                               | Return the used MPI communicator.                                                                                                                                  |
+| `print(data_set)`                              | Overload to print a data set object displaying the label type, the number of data points and features as well as the classes and scaling interval (if applicable). |
 
 The following methods are **only** available for a `plssvm.ClassificationDataSet`:
 
@@ -415,17 +416,18 @@ The following methods are **only** available for a `plssvm.ClassificationDataSet
 
 A class encapsulating and performing the scaling of a data set to the provided `[lower, upper]` range.
 
-| constructors                 | description                                                          |
-|------------------------------|----------------------------------------------------------------------|
-| `MinMaxScaler(lower, upper)` | Scale all data points feature-wise to the interval `[lower, upper]`. |
-| `MinMaxScaler(interval)`     | Scale all data points feature-wise to the provided interval.         |
-| `MinMaxScaler(filename)`     | Read previously calculated scaling factors from the provided file.   |
+| constructors                                                  | description                                                          |
+|---------------------------------------------------------------|----------------------------------------------------------------------|
+| `MinMaxScaler(lower, upper, *, comm=*used MPI communicator*)` | Scale all data points feature-wise to the interval `[lower, upper]`. |
+| `MinMaxScaler(interval, *, comm=*used MPI communicator*)`     | Scale all data points feature-wise to the provided interval.         |
+| `MinMaxScaler(filename, *, comm=*used MPI communicator*)`     | Read previously calculated scaling factors from the provided file.   |
 
 | methods              | description                                                                                                       |
 |----------------------|-------------------------------------------------------------------------------------------------------------------|
 | `save(filename)`     | Save the current scaling factors to the provided file.                                                            |
 | `scaling_interval()` | The scaling interval.                                                                                             |
 | `scaling_factors())` | The calculated feature-wise scaling factors.                                                                      |
+| `communicator()`     | Return the used MPI communicator.                                                                                 |
 | `print(scaling)`     | Overload to print a data set scaling object object displaying the scaling interval and number of scaling factors. |
 
 ##### `plssvm.MinMaxScalerFactors`
@@ -460,10 +462,10 @@ If the most performant backend should be used, it is sufficient to use `plssvm.C
 
 The following constructors and methods are available for both classification `CSVC` and regression `CSVR`:
 
-| constructors                                                        | description                                                                                                                                             |
-|---------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `CSVC([backend, target_platform, plssvm.Parameter kwargs])`         | Create a new C-SVM with the provided named arguments.                                                                                                   |
-| `CSVC(params, [backend, target_platform, plssvm.Parameter kwargs])` | Create a new C-SVM with the provided parameters and named arguments; the values in the `plssvm.Parameter` will be overwritten by the keyword arguments. |
+| constructors                                                                                                                                                                | description                                                                         |
+|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|
+| `CSVC(backend, target, *, params=plssvm.Parameter, comm=*used MPI communicator*)`                                                                                           | Create a new C-SVM with the provided named arguments and `plssvm.Parameter` object. |
+| `CSVC(pbackend, target, *, kernel_type=plssvm.KernelFunctionType.RBF, degree=3, gamma=plssvm.GammaCoefficientType.AUTO, coef0=0.0, cost=1.0, comm=*used MPI communicator*)` | Create a new C-SVM with the provided parameters and named arguments.                |
 
 **Note**: if the backend type is `plssvm.BackendType.SYCL` two additional named parameters can be provided:
 `sycl_implementation_type` to choose between DPC++ and AdaptiveCpp as SYCL implementations
@@ -473,17 +475,18 @@ and `sycl_kernel_invocation_type` to choose between the two different SYCL kerne
 finalization functions must be called.
 However, this is **automatically** handled by our Python bindings on the module import and cleanup.
 
-| methods                                                                                                                                      | description                                                                                                                                                                                                         |
-|----------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `set_params(params)`                                                                                                                         | Replace the current `plssvm.Parameter` with the provided one.                                                                                                                                                       |
-| `set_params([kernel_type=KernelFunctionType.LINEAR, degree=3, gamma=*1/#features*, coef=0.0, cost=1.0])`                                     | Replace the current `plssvm.Parameter` values with the provided named parameters.                                                                                                                                   |
-| `get_params()`                                                                                                                               | Return the `plssvm.Parameter` that are used in the C-SVM to learn the model.                                                                                                                                        |
-| `get_target_platform()`                                                                                                                      | Return the target platform this C-SVM is running on.                                                                                                                                                                |
-| `num_available_devices()`                                                                                                                    | Return the number of available devices, i.e., if the target platform represents a GPU, this function returns the number of used GPUs. Returns always 1 for CPU only backends.                                       |
-| `fit(data_set, [epsilon=0.01, classification=plssvm.ClassificatioType.OAA, solver=plssvm.SolverType.AUTOMATIC, max_iter=*#datapoints - 1*])` | Learn a LS-SVM model given the provided data points and optional parameters (the termination criterion in the CG algorithm, the classification strategy, the used solver, and the maximum number of CG iterations). |
-| `predict(model, data_set)`                                                                                                                   | Predict the labels of the data set using the previously learned model.                                                                                                                                              |
-| `score(model)`                                                                                                                               | Score the model with respect to itself returning its accuracy.                                                                                                                                                      |
-| `score(model, data_set)`                                                                                                                     | Score the model given the provided data set returning its accuracy.                                                                                                                                                 |
+| methods                                                                                                                                    | description                                                                                                                                                                                                         |
+|--------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `get_params()`                                                                                                                             | Return the `plssvm.Parameter` that are used in the C-SVM to learn the model.                                                                                                                                        |
+| `set_params(params=plssvm.Parameter)`                                                                                                      | Replace the current `plssvm.Parameter` with the provided one.                                                                                                                                                       |
+| `set_params(*, kernel_type=KernelFunctionType.LINEAR, degree=3, gamma=plssvm.GammaCoefficientType.AUTO, coef=0.0, cost=1.0])`              | Replace the current `plssvm.Parameter` values with the provided named parameters.                                                                                                                                   |
+| `get_target_platform()`                                                                                                                    | Return the target platform this C-SVM is running on.                                                                                                                                                                |
+| `num_available_devices()`                                                                                                                  | Return the number of available devices, i.e., if the target platform represents a GPU, this function returns the number of used GPUs. Returns always 1 for CPU only backends.                                       |
+| `communicator()`                                                                                                                           | Return the used MPI communicator.                                                                                                                                                                                   |
+| `fit(data, *, epsilon=1e-10, classification=plssvm.ClassificatioType.OAA, solver=plssvm.SolverType.AUTOMATIC, max_iter=*#datapoints - 1*)` | Learn a LS-SVM model given the provided data points and optional parameters (the termination criterion in the CG algorithm, the classification strategy, the used solver, and the maximum number of CG iterations). |
+| `predict(model, data)`                                                                                                                     | Predict the labels of the data set using the previously learned model.                                                                                                                                              |
+| `score(model)`                                                                                                                             | Score the model with respect to itself returning its accuracy.                                                                                                                                                      |
+| `score(model, data)`                                                                                                                       | Score the model given the provided data set returning its accuracy.                                                                                                                                                 |
 
 **Note**: the `classification` named parameter is not allowed for the `CSVR`!
 
@@ -510,14 +513,13 @@ supported as target.
 These classes inherit all methods from the base `plssvm.CSVC` or `plssvm.CSVR` classes.
 The following constructors and methods are available for both classification `CSVC` and regression `CSVR`:
 
-| constructors                              | description                                                                                                                       |
-|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------|
-| `CSVC(params)`                            | Create a new C-SVM with the default target platform. The hyper-parameters are explicitly set to the provided `plssvm.Parameter`.  |
-| `CSVC(target, params)`                    | Create a new C-SVM with the provided target platform. The hyper-parameters are explicitly set to the provided `plssvm.Parameter`. |
-| `CSVC([plssvm.Parameter kwargs])`         | Create a new C-SVM with the default target platform. The hyper-parameter values are set ot the provided named parameter values.   |
-| `CSVC(target, [plssvm.Parameter kwargs])` | Create a new C-SVM with the provided target platform. The hyper-parameter values are set ot the provided named parameter values.  |
+| constructors                                                                                                                                                      | description                                                                         |
+|-------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|
+| `CSVC(target, *, params=plssvm.Parameter, comm=*used MPI communicator*)`                                                                                          | Create a new C-SVM with the provided named arguments and `plssvm.Parameter` object. |
+| `CSVC(target, *, kernel_type=plssvm.KernelFunctionType.RBF, degree=3, gamma=plssvm.GammaCoefficientType.AUTO, coef0=0.0, cost=1.0, comm=*used MPI communicator*)` | Create a new C-SVM with the provided parameters and named arguments.                |
 
-In case of the SYCL C-SVMs (`plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, and `plssvm.adaptivecpp.CSVM`; the same for the `CSVR`s), additionally, all constructors also accept the SYCL specific `sycl_kernel_invocation_type` keyword parameter.
+In case of the SYCL C-SVMs (`plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, and `plssvm.adaptivecpp.CSVM`; the same for the 
+`CSVR`s), additionally, all constructors also accept the SYCL specific `sycl_kernel_invocation_type` keyword parameter.
 Also, the following method is additional available for the backend specific C-SVM:
 
 | methods                        | description                             |
@@ -544,43 +546,32 @@ A class encapsulating a model learned during a call to `plssvm.CSVC.fit()` or `p
 
 The following constructors and methods are available for both the classification and regression models:
 
-| constructors                                                    | description                                                                                                                                                                |
-|-----------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `ClassificationModel(model_file, [type=*the used label type*])` | Construct a new model object by reading a previously learned model from a file. Default type: `std::string` for the ClassificationModel, `double` for the RegressionModel. |
-
-| methods                     | description                                                                                                                                                             |
-|-----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `save(filename)`            | Save the current model to the provided file.                                                                                                                            |
-| `num_support_vectors()`     | Return the number of support vectors. **Note**: for LS-SVMs this corresponds to the number of training data points.                                                     |
-| `num_features()`            | Return the number of features each support vector has.                                                                                                                  |
-| `get_params()`              | Return the `plssvm.Parameter` that were used to learn this model.                                                                                                       |
-| `support_vectors()`         | Return the support vectors learned in this model. **Note**: for LS-SVMs this corresponds to all training data points.                                                   |
-| `labels()`                  | Return the labels of the support vectors.                                                                                                                               |
-| `weights()`                 | Return the learned weights.                                                                                                                                             |
-| `rho()`                     | Return the learned bias values.                                                                                                                                         |
-| `print(model)`              | Overload to print a model object displaying the number of support vectors and features, as well as the learned biases and used classification strategy (if applicable). |
+| constructors                                                                                 | description                                                                                                                                                                |
+|----------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `ClassificationModel(filename, *, type=*the used label type*, comm=*used MPI communicator*)` | Construct a new model object by reading a previously learned model from a file. Default type: `std::string` for the ClassificationModel, `double` for the RegressionModel. |
+
+| methods                 | description                                                                                                                                                             |
+|-------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `save(filename)`        | Save the current model to the provided file.                                                                                                                            |
+| `num_support_vectors()` | Return the number of support vectors. **Note**: for LS-SVMs this corresponds to the number of training data points.                                                     |
+| `num_features()`        | Return the number of features each support vector has.                                                                                                                  |
+| `get_params()`          | Return the `plssvm.Parameter` that were used to learn this model.                                                                                                       |
+| `support_vectors()`     | Return the support vectors learned in this model. **Note**: for LS-SVMs this corresponds to all training data points.                                                   |
+| `labels()`              | Return the labels of the support vectors.                                                                                                                               |
+| `weights()`             | Return the learned weights.                                                                                                                                             |
+| `rho()`                 | Return the learned bias values.                                                                                                                                         |
+| `communicator()`        | Return the used MPI communicator.                                                                                                                                       |
+| `print(model)`          | Overload to print a model object displaying the number of support vectors and features, as well as the learned biases and used classification strategy (if applicable). |
 
 The following methods are **only** available for a `plssvm.ClassificationModel`:
 
 | methods                     | description                              |
 |-----------------------------|------------------------------------------|
-| `classes()`                 | Return the different classes.            |
 | `num_classes()`             | Return the number of different classes.  |
+| `classes()`                 | Return the different classes.            |
 | `get_classification_type()` | Return the used classification strategy. |
 
-#### `plssvm.Version`
-
-A class encapsulating the version information of the used PLSSVM installation.
-
-| attributes         | description                               |
-|--------------------|-------------------------------------------|
-| `name : string`    | The full name of the PLSSVM library.      |
-| `version : string` | The PLSSVM version ("major.minor.patch"). |
-| `major : int`      | The major PLSSVM version.                 |
-| `minor : int`      | The minor PLSSVM version.                 |
-| `patch : int`      | The patch PLSSVM version.                 |
-
-#### `plssvm.detail.tracking.PerformanceTracker`
+#### `plssvm.performance_tracking`
 
 A submodule used to track various performance statistics like runtimes, but also the used setup and hyperparameters.
 The tracked metrics can be saved to a YAML file for later post-processing.
@@ -601,12 +592,12 @@ The tracked metrics can be saved to a YAML file for later post-processing.
 | `get_events()`                                     | Return all previously recorded events.                                                 |
 | `clear_tracking_entries()`                         | Remove all currently tracked entries from the performance tracker.                     |
 
-#### `plssvm.detail.tracking.Event`, `plssvm.detail.tracking.Events`
+#### `plssvm.performance_tracking.Event`, `plssvm.performance_tracking.Events`
 
 Two rather similar classes.
 **Note**: both classes are only available if PLSSVM was built with `-DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON`!
 
-The `plssvm.detail.tracking.Event` class is a simple POD encapsulating the time point when
+The `plssvm.performance_tracking.Event` class is a simple POD encapsulating the time point when
 an event occurred and the respective event name.
 
 | constructors              | description            |
@@ -618,7 +609,7 @@ an event occurred and the respective event name.
 | `time_point : time` | The time point when this event occurred. |
 | `name : string`     | The name of this event.                  |
 
-The `plssvm.detail.tracking.Events` class stores multiple `plssvm.detail.tracking.Event`s.
+The `plssvm.performance_tracking.Events` class stores multiple `plssvm.performance_tracking.Event`s.
 
 | constructors | description                                 |
 |--------------|---------------------------------------------|
@@ -644,12 +635,12 @@ The following table lists all free functions in PLSSVM directly callable via `pl
 | `determine_default_target_platform(platform_device_list)`                   | Determines the default target platform used given the available target platforms.                                                                                                                                                                                                                 |
 | `kernel_function_type_to_math_string(kernel)`                               | Returns a math string of the provided kernel function.                                                                                                                                                                                                                                            |
 | `linear_kernel_function(x, y)`                                              | Calculate the linear kernel function of two vectors: x'*y                                                                                                                                                                                                                                         |
-| `polynomial_kernel_function(x, y, degree, gamma, coef0)`                    | Calculate the polynomial kernel function of two vectors: (gamma*x'*y+coef0)^degree, with degree ∊ ℤ, gamma > 0                                                                                                                                                                                    |
-| `rbf_kernel_function(x, y, gamma)`                                          | Calculate the radial basis function kernel function of two vectors: exp(-gamma*\|x-y\|^2), with gamma > 0                                                                                                                                                                                         |
-| `sigmoid_kernel_function(x, y, gamma, coef0)`                               | Calculate the sigmoid kernel function of two vectors: tanh(gamma*x'*y), with gamma > 0                                                                                                                                                                                                            |
-| `laplacian_kernel_function(x, y, gamma)`                                    | Calculate the laplacian kernel function of two vectors: exp(-gamma*\|x-y\|_1), with gamma > 0                                                                                                                                                                                                     |
-| `chi_squared_kernel_function(x, y, gamma)`                                  | Calculate the chi-squared kernel function of two vectors: exp(-gamma*sum_i((x[i] - y[i])^2) / (x[i] + y[i])), with gamma > 0                                                                                                                                                                      |
-| `kernel_function(x, y, params)`                                             | Calculate the kernel function provided in params with the additional parameters also provided in params.                                                                                                                                                                                          |
+| `polynomial_kernel_function(x, y, *, degree, gamma, coef0)`                 | Calculate the polynomial kernel function of two vectors: (gamma*x'*y+coef0)^degree, with degree ∊ ℤ, gamma > 0                                                                                                                                                                                    |
+| `rbf_kernel_function(x, y, *, gamma)`                                       | Calculate the radial basis function kernel function of two vectors: exp(-gamma*\|x-y\|^2), with gamma > 0                                                                                                                                                                                         |
+| `sigmoid_kernel_function(x, y, *, gamma, coef0)`                            | Calculate the sigmoid kernel function of two vectors: tanh(gamma*x'*y), with gamma > 0                                                                                                                                                                                                            |
+| `laplacian_kernel_function(x, y, *, gamma)`                                 | Calculate the laplacian kernel function of two vectors: exp(-gamma*\|x-y\|_1), with gamma > 0                                                                                                                                                                                                     |
+| `chi_squared_kernel_function(x, y, *, gamma)`                               | Calculate the chi-squared kernel function of two vectors: exp(-gamma*sum_i((x[i] - y[i])^2) / (x[i] + y[i])), with gamma > 0                                                                                                                                                                      |
+| `kernel_function(x, y, *, params)`                                          | Calculate the kernel function provided in params with the additional parameters also provided in params.                                                                                                                                                                                          |
 | `classification_type_to_full_string(classification)`                        | Returns the full string of the provided classification type, i.e., "one vs. all" and "one vs. one" instead of only "oaa" or "oao".                                                                                                                                                                |
 | `calculate_number_of_classifiers(classification, num_classes)`              | Return the number of necessary classifiers in a multi-class setting with the provided classification strategy and number of different classes.                                                                                                                                                    |
 | `list_available_backends()`                                                 | List all available backends (determined during PLSSVM's build step).                                                                                                                                                                                                                              |
@@ -663,7 +654,7 @@ The following table lists all free functions in PLSSVM directly callable via `pl
 | `list_available_svm_types()`                                                | List all available SVM types (C-SVC or C-SVR).                                                                                                                                                                                                                                                    |
 | `svm_type_to_task_name(svm_type)`                                           | Returns the task name (classification or regression) associated with the provided SVM type.                                                                                                                                                                                                       |
 | `svm_type_from_model_file(model_file)`                                      | Returns the SVM type used to create the provided model file.                                                                                                                                                                                                                                      |
-| `regression_report(y_true, y_pred, [force_finite, output_dict])`            | Returns a regression report similar to sklearn's [`metrics.classification_report`](https://scikit-learn.org/0.15/modules/generated/sklearn.metrics.classification_report.html) for the regression task. If `output_dict` is , returns a Python dictionary, otherwise directly returns a string.   |
+| `regression_report(y_true, y_pred, *, force_finite, output_dict)`           | Returns a regression report similar to sklearn's [`metrics.classification_report`](https://scikit-learn.org/0.15/modules/generated/sklearn.metrics.classification_report.html) for the regression task. If `output_dict` is , returns a Python dictionary, otherwise directly returns a string.   |
 
 If a SYCL implementation is available, additional free functions are available:
 
@@ -677,6 +668,16 @@ If a stdpar implementation is available, additional free functions are available
 |-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|
 | `list_available_stdpar_implementations()` | List all available stdpar implementations (determined during PLSSVM's build step; currently always guaranteed to be only one implementation). |
 
+### Module Level Attributes
+
+A few model level attributes are support and directly retrievable in the top-level `plssvm` module:
+
+| attribute          | description                                                                               |
+|--------------------|-------------------------------------------------------------------------------------------|
+| `__name__`         | The name of our PLSSVM library: "PLSSVM - Parallel Least Squares Support Vector Machine". |
+| `__version__`      | The current PLSSVM version as version string.                                             |
+| `__version_info__` | The current PLSSVM major, minor, and patch versions as tuple.                             |
+
 ### Exceptions
 
 The PLSSVM Python3 bindings define a few new exception types:
@@ -694,7 +695,7 @@ The PLSSVM Python3 bindings define a few new exception types:
 | `UnsupportedKernelTypeError` | If an unsupported target platform has been requested.                                                                                                           |
 | `GPUDevicePtrError`          | If something went wrong in one of the backend's GPU device pointers. **Note**: shouldn't occur in user code.                                                    |
 | `MatrixError`                | If something went wrong in the internal matrix class. **Note**: shouldn't occur in user code.                                                                   |
-| `KernelLaunchResourcesError` | If something went wrong during a kernel launch due to insufficient ressources.                                                                                  |
+| `KernelLaunchResourcesError` | If something went wrong during a kernel launch due to insufficient resources.                                                                                   |
 | `ClassificationReportError`  | If something in the classification report went wrong. **Note**: shouldn't occur in user code.                                                                   |
 | `RegressionReportError`      | If something in the regression report went wrong. **Note**: shouldn't occur in user code.                                                                       |
 | `EnvironmentError`           | If something during the special environment initialization or finalization went wrong.                                                                          |
diff --git a/bindings/Python/__init__.py b/bindings/Python/__init__.py
index 5976a4f2a..fa9c3e369 100644
--- a/bindings/Python/__init__.py
+++ b/bindings/Python/__init__.py
@@ -6,3 +6,15 @@
 # explicitly set the module level attributes
 __doc__ = plssvm.__doc__
 __version__ = plssvm.__version__
+__version_info__ = plssvm.__version_info__
+__has_mpi_support__ = plssvm.__has_mpi_support__
+
+# explicitly register the submodules as importable Python module
+import sys
+
+possible_submodules = \
+    ["svm", "performance_tracking",
+     "adaptivecpp", "cuda", "dpcpp", "hip", "hpx", "kokkos", "opencl", "openmp", "stdpar", "sycl"]
+for submodule in possible_submodules:
+    if hasattr(plssvm, submodule):
+        sys.modules[f"plssvm.{submodule}"] = getattr(plssvm, submodule)
diff --git a/bindings/Python/backend_types.cpp b/bindings/Python/backend_types.cpp
index 7997991b9..7696ddaa8 100644
--- a/bindings/Python/backend_types.cpp
+++ b/bindings/Python/backend_types.cpp
@@ -9,6 +9,8 @@
 
 #include "plssvm/backend_types.hpp"  // plssvm::backend_type, plssvm::list_available_backends, plssvm::determine_default_backend
 
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::register_implicit_str_enum_conversion
+
 #include "pybind11/pybind11.h"  // py::module_, py::enum_
 #include "pybind11/stl.h"       // support for STL types: std::vector
 
@@ -18,7 +20,8 @@ namespace py = pybind11;
 
 void init_backend_types(py::module_ &m) {
     // bind enum class
-    py::enum_<plssvm::backend_type>(m, "BackendType", "Enum class for all possible backend types, all different SYCL implementations have the same backend type \"sycl\".")
+    py::enum_<plssvm::backend_type> py_enum(m, "BackendType", "Enum class for all possible backend types, all different SYCL implementations have the same backend type \"sycl\".");
+    py_enum
         .value("AUTOMATIC", plssvm::backend_type::automatic, "the default backend; depends on the specified target platform")
         .value("OPENMP", plssvm::backend_type::openmp, "OpenMP to target CPUs only (currently no OpenMP target offloading support)")
         .value("HPX", plssvm::backend_type::hpx, "HPX to target CPUs only (currently no GPU executor support)")
@@ -29,6 +32,9 @@ void init_backend_types(py::module_ &m) {
         .value("SYCL", plssvm::backend_type::sycl, "SYCL to target CPUs and GPUs from different vendors; currently tested SYCL implementations are DPC++ and AdaptiveCpp")
         .value("KOKKOS", plssvm::backend_type::kokkos, "Kokkos to target CPUs and GPUs from different vendors; currently all Kokkos execution spaces except Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC are supported");
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::backend_type>(py_enum);
+
     // bind free functions
     m.def("list_available_backends", &plssvm::list_available_backends, "list the available backends (as found during CMake configuration)");
     m.def("determine_default_backend", &plssvm::determine_default_backend, "determine the default backend given the list of available backends and target platforms", py::arg("available_backends") = plssvm::list_available_backends(), py::arg("available_target_platforms") = plssvm::list_available_target_platforms());
diff --git a/bindings/Python/backends/adaptivecpp_csvm.cpp b/bindings/Python/backends/adaptivecpp_csvm.cpp
index a02dd0b65..bf43d85f1 100644
--- a/bindings/Python/backends/adaptivecpp_csvm.cpp
+++ b/bindings/Python/backends/adaptivecpp_csvm.cpp
@@ -10,21 +10,27 @@
 #include "plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp"         // plssvm::adaptivecpp::csvm
 #include "plssvm/backends/SYCL/exceptions.hpp"               // plssvm::adaptivecpp::backend_exception
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
+#include "plssvm/constants.hpp"                              // plssvm::real_type
 #include "plssvm/exceptions/exceptions.hpp"                  // plssvm::exception
+#include "plssvm/gamma.hpp"                                  // plssvm::gamma
+#include "plssvm/kernel_function_types.hpp"                  // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                       // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                              // plssvm::parameter
 #include "plssvm/svm/csvc.hpp"                               // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                               // plssvm::csvm
 #include "plssvm/svm/csvr.hpp"                               // plssvm::csvr
 #include "plssvm/target_platforms.hpp"                       // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception}
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::register_py_exception
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::exception
-#include "pybind11/pytypes.h"   // py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
-#include <memory>  // std::make_unique
-#include <string>  // std::string
+#include <memory>   // std::make_unique
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
@@ -34,54 +40,38 @@ template <typename csvm_type>
 void bind_adaptivecpp_csvms(py::module_ &m, const std::string &csvm_name) {
     using backend_csvm_type = plssvm::adaptivecpp::backend_csvm_type_t<csvm_type>;
 
+    // the default parameters used
+    const plssvm::parameter default_params{};
+
     // assemble docstrings
     const std::string class_docstring{ fmt::format("A {} using the AdaptiveCpp SYCL backend.", csvm_name) };
-    const std::string param_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided parameters and optional SYCL specific keyword arguments", csvm_name) };
-    const std::string target_param_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided target platform, parameters, and optional SYCL specific keyword arguments", csvm_name) };
-    const std::string kwargs_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided keyword arguments (including optional SYCL specific keyword arguments)", csvm_name) };
-    const std::string target_kwargs_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided target platform and keyword arguments (including optional SYCL specific keyword arguments)", csvm_name) };
+    const std::string params_constructor_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided SVM parameter encapsulated in a plssvm.Parameter and optional SYCL specific keyword arguments", csvm_name) };
+    const std::string keyword_args_constructor_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided SVM parameter as separate keyword arguments including optional SYCL specific keyword arguments", csvm_name) };
 
     py::class_<backend_csvm_type, plssvm::adaptivecpp::csvm, csvm_type>(m, csvm_name.c_str(), class_docstring.c_str())
-        .def(py::init([](const plssvm::parameter params, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "sycl_kernel_invocation_type" });
-                 // set SYCL kernel invocation type
-                 const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>() : plssvm::sycl::kernel_invocation_type::automatic;
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params, plssvm::sycl_kernel_invocation_type = invocation);
-             }),
-             param_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "sycl_kernel_invocation_type" });
-                 // set SYCL kernel invocation type
-                 const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>() : plssvm::sycl::kernel_invocation_type::automatic;
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(target, params, plssvm::sycl_kernel_invocation_type = invocation);
-             }),
-             target_param_docstring.c_str())
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // set SYCL kernel invocation type
-                 const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>() : plssvm::sycl::kernel_invocation_type::automatic;
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params, plssvm::sycl_kernel_invocation_type = invocation);
+        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const plssvm::sycl::kernel_invocation_type invocation, plssvm::mpi::communicator comm) {
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params, plssvm::sycl_kernel_invocation_type = invocation);
              }),
-             kwargs_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // set SYCL kernel invocation type
-                 const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>() : plssvm::sycl::kernel_invocation_type::automatic;
-                 // create C-SVM with the provided target platform
-                 return std::make_unique<backend_csvm_type>(target, params, plssvm::sycl_kernel_invocation_type = invocation);
+             params_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("sycl_kernel_invocation_type") = plssvm::sycl::kernel_invocation_type::automatic,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, const plssvm::sycl::kernel_invocation_type invocation, plssvm::mpi::communicator comm) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params, plssvm::sycl_kernel_invocation_type = invocation);
              }),
-             target_kwargs_docstring.c_str())
+             keyword_args_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("sycl_kernel_invocation_type") = plssvm::sycl::kernel_invocation_type::automatic,
+             py::arg("comm") = plssvm::mpi::communicator{})
         .def("get_kernel_invocation_type", &plssvm::adaptivecpp::csvm::get_kernel_invocation_type, "get the kernel invocation type used in this SYCL C-SVM")
         .def("__repr__", [csvm_name](const backend_csvm_type &self) {
             return fmt::format("<plssvm.adaptivecpp.{} with {{ #devices: {}, kernel_invocation_type: {} }}>", csvm_name, self.num_available_devices(), self.get_kernel_invocation_type());
@@ -93,10 +83,9 @@ void bind_adaptivecpp_csvms(py::module_ &m, const std::string &csvm_name) {
 py::module_ init_adaptivecpp_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
     // use its own submodule for the AdaptiveCpp C-SVM bindings
     py::module_ adaptivecpp_module = m.def_submodule("adaptivecpp", "a module containing all AdaptiveCpp backend specific functionality");
-    const py::module_ adaptivecpp_pure_virtual_module = adaptivecpp_module.def_submodule("__pure_virtual", "a module containing all pure-virtual AdaptiveCpp backend specific functionality");
 
     // bind the pure-virtual base AdaptiveCpp C-SVM
-    [[maybe_unused]] const py::class_<plssvm::adaptivecpp::csvm, plssvm::csvm> virtual_base_adaptivecpp_csvm(adaptivecpp_pure_virtual_module, "__pure_virtual_adaptivecpp_base_CSVM");
+    [[maybe_unused]] const py::class_<plssvm::adaptivecpp::csvm, plssvm::csvm> virtual_base_adaptivecpp_csvm(m, "__pure_virtual_adaptivecpp_CSVM", py::module_local());
 
     // bind the specific AdaptiveCpp C-SVC and C-SVR classes
     bind_adaptivecpp_csvms<plssvm::csvc>(adaptivecpp_module, "CSVC");
diff --git a/bindings/Python/backends/cuda_csvm.cpp b/bindings/Python/backends/cuda_csvm.cpp
index cb7867d20..f5984065d 100644
--- a/bindings/Python/backends/cuda_csvm.cpp
+++ b/bindings/Python/backends/cuda_csvm.cpp
@@ -9,21 +9,27 @@
 #include "plssvm/backend_types.hpp"             // plssvm::cuda::backend_csvm_type_t
 #include "plssvm/backends/CUDA/csvm.hpp"        // plssvm::cuda::csvm
 #include "plssvm/backends/CUDA/exceptions.hpp"  // plssvm::cuda::backend_exception
+#include "plssvm/constants.hpp"                 // plssvm::real_type
 #include "plssvm/exceptions/exceptions.hpp"     // plssvm::exception
+#include "plssvm/gamma.hpp"                     // plssvm::gamma
+#include "plssvm/kernel_function_types.hpp"     // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"          // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                 // plssvm::parameter
 #include "plssvm/svm/csvc.hpp"                  // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                  // plssvm::csvm
 #include "plssvm/svm/csvr.hpp"                  // plssvm::csvr
 #include "plssvm/target_platforms.hpp"          // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception}
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::register_py_exception
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::exception
-#include "pybind11/pytypes.h"   // py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::exception
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
-#include <memory>  // std::make_unique
-#include <string>  // std::string
+#include <memory>   // std::make_unique
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
@@ -33,34 +39,36 @@ template <typename csvm_type>
 void bind_cuda_csvms(py::module_ &m, const std::string &csvm_name) {
     using backend_csvm_type = plssvm::cuda::backend_csvm_type_t<csvm_type>;
 
+    // the default parameters used
+    const plssvm::parameter default_params{};
+
     // assemble docstrings
     const std::string class_docstring{ fmt::format("A {} using the CUDA backend.", csvm_name) };
-    const std::string param_docstring{ fmt::format("create a CUDA {} with the provided parameters", csvm_name) };
-    const std::string target_param_docstring{ fmt::format("create a CUDA {} with the provided target platform and parameters", csvm_name) };
-    const std::string kwargs_docstring{ fmt::format("create a CUDA {} with the provided keyword arguments", csvm_name) };
-    const std::string target_kwargs_docstring{ fmt::format("create a CUDA {} with the provided target platform and keyword arguments", csvm_name) };
+    const std::string params_constructor_docstring{ fmt::format("create a CUDA {} with the provided SVM parameter encapsulated in a plssvm.Parameter", csvm_name) };
+    const std::string keyword_args_constructor_docstring{ fmt::format("create a CUDA {} with the provided SVM parameter as separate keyword arguments", csvm_name) };
 
     py::class_<backend_csvm_type, plssvm::cuda::csvm, csvm_type>(m, csvm_name.c_str(), class_docstring.c_str())
-        .def(py::init<plssvm::parameter>(), param_docstring.c_str())
-        .def(py::init<plssvm::target_platform, plssvm::parameter>(), target_param_docstring.c_str())
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params);
+        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, plssvm::mpi::communicator comm) {
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             kwargs_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the provided target platform
-                 return std::make_unique<backend_csvm_type>(target, params);
+             params_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, plssvm::mpi::communicator comm) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             target_kwargs_docstring.c_str())
+             keyword_args_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("comm") = plssvm::mpi::communicator{})
         .def("__repr__", [csvm_name](const backend_csvm_type &self) {
             return fmt::format("<plssvm.cuda.{} with {{ #devices: {} }}>", csvm_name, self.num_available_devices());
         });
@@ -71,10 +79,9 @@ void bind_cuda_csvms(py::module_ &m, const std::string &csvm_name) {
 void init_cuda_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
     // use its own submodule for the CUDA C-SVM bindings
     py::module_ cuda_module = m.def_submodule("cuda", "a module containing all CUDA backend specific functionality");
-    const py::module_ cuda_pure_virtual_module = cuda_module.def_submodule("__pure_virtual", "a module containing all pure-virtual CUDA backend specific functionality");
 
     // bind the pure-virtual base CUDA C-SVM
-    [[maybe_unused]] const py::class_<plssvm::cuda::csvm, plssvm::csvm> virtual_base_cuda_csvm(cuda_pure_virtual_module, "__pure_virtual_cuda_base_CSVM");
+    [[maybe_unused]] const py::class_<plssvm::cuda::csvm, plssvm::csvm> virtual_base_cuda_csvm(m, "__pure_virtual_cuda_CSVM", py::module_local());
 
     // bind the specific CUDA C-SVC and C-SVR classes
     bind_cuda_csvms<plssvm::csvc>(cuda_module, "CSVC");
diff --git a/bindings/Python/backends/dpcpp_csvm.cpp b/bindings/Python/backends/dpcpp_csvm.cpp
index b895bdeef..51dcd7e16 100644
--- a/bindings/Python/backends/dpcpp_csvm.cpp
+++ b/bindings/Python/backends/dpcpp_csvm.cpp
@@ -10,21 +10,27 @@
 #include "plssvm/backends/SYCL/DPCPP/csvm.hpp"               // plssvm::dpcpp::csvm
 #include "plssvm/backends/SYCL/exceptions.hpp"               // plssvm::dpcpp::backend_exception
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
+#include "plssvm/constants.hpp"                              // plssvm::real_type
 #include "plssvm/exceptions/exceptions.hpp"                  // plssvm::exception
+#include "plssvm/gamma.hpp"                                  // plssvm::gamma
+#include "plssvm/kernel_function_types.hpp"                  // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                       // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                              // plssvm::parameter
 #include "plssvm/svm/csvc.hpp"                               // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                               // plssvm::csvm
 #include "plssvm/svm/csvr.hpp"                               // plssvm::csvr
 #include "plssvm/target_platforms.hpp"                       // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception}
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::register_py_exception
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::exception
-#include "pybind11/pytypes.h"   // py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
-#include <memory>  // std::make_unique
-#include <string>  // std::string
+#include <memory>   // std::make_unique
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
@@ -34,54 +40,38 @@ template <typename csvm_type>
 void bind_dpcpp_csvms(py::module_ &m, const std::string &csvm_name) {
     using backend_csvm_type = plssvm::dpcpp::backend_csvm_type_t<csvm_type>;
 
+    // the default parameters used
+    const plssvm::parameter default_params{};
+
     // assemble docstrings
     const std::string class_docstring{ fmt::format("A {} using the DPC++ SYCL backend.", csvm_name) };
-    const std::string param_docstring{ fmt::format("create a DPC++ SYCL {} with the provided parameters and optional SYCL specific keyword arguments", csvm_name) };
-    const std::string target_param_docstring{ fmt::format("create a DPC++ SYCL {} with the provided target platform, parameters, and optional SYCL specific keyword arguments", csvm_name) };
-    const std::string kwargs_docstring{ fmt::format("create a DPC++ SYCL {} with the provided keyword arguments (including optional SYCL specific keyword arguments)", csvm_name) };
-    const std::string target_kwargs_docstring{ fmt::format("create a DPC++ SYCL {} with the provided target platform and keyword arguments (including optional SYCL specific keyword arguments)", csvm_name) };
+    const std::string params_constructor_docstring{ fmt::format("create a DPC++ SYCL {} with the provided SVM parameter encapsulated in a plssvm.Parameter and optional SYCL specific keyword arguments", csvm_name) };
+    const std::string keyword_args_constructor_docstring{ fmt::format("create a DPC++ SYCL {} with the provided SVM parameter as separate keyword arguments including optional SYCL specific keyword arguments", csvm_name) };
 
     py::class_<backend_csvm_type, plssvm::dpcpp::csvm, csvm_type>(m, csvm_name.c_str(), class_docstring.c_str())
-        .def(py::init([](const plssvm::parameter params, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "sycl_kernel_invocation_type" });
-                 // set SYCL kernel invocation type
-                 const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>() : plssvm::sycl::kernel_invocation_type::automatic;
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params, plssvm::sycl_kernel_invocation_type = invocation);
-             }),
-             param_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "sycl_kernel_invocation_type" });
-                 // set SYCL kernel invocation type
-                 const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>() : plssvm::sycl::kernel_invocation_type::automatic;
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(target, params, plssvm::sycl_kernel_invocation_type = invocation);
-             }),
-             target_param_docstring.c_str())
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // set SYCL kernel invocation type
-                 const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>() : plssvm::sycl::kernel_invocation_type::automatic;
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params, plssvm::sycl_kernel_invocation_type = invocation);
+        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const plssvm::sycl::kernel_invocation_type invocation, plssvm::mpi::communicator comm) {
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params, plssvm::sycl_kernel_invocation_type = invocation);
              }),
-             kwargs_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // set SYCL kernel invocation type
-                 const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>() : plssvm::sycl::kernel_invocation_type::automatic;
-                 // create C-SVM with the provided target platform
-                 return std::make_unique<backend_csvm_type>(target, params, plssvm::sycl_kernel_invocation_type = invocation);
+             params_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("sycl_kernel_invocation_type") = plssvm::sycl::kernel_invocation_type::automatic,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, const plssvm::sycl::kernel_invocation_type invocation, plssvm::mpi::communicator comm) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params, plssvm::sycl_kernel_invocation_type = invocation);
              }),
-             target_kwargs_docstring.c_str())
+             keyword_args_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("sycl_kernel_invocation_type") = plssvm::sycl::kernel_invocation_type::automatic,
+             py::arg("comm") = plssvm::mpi::communicator{})
         .def("get_kernel_invocation_type", &plssvm::dpcpp::csvm::get_kernel_invocation_type, "get the kernel invocation type used in this SYCL C-SVM")
         .def("__repr__", [csvm_name](const backend_csvm_type &self) {
             return fmt::format("<plssvm.dpcpp.{} with {{ #devices: {}, kernel_invocation_type: {} }}>", csvm_name, self.num_available_devices(), self.get_kernel_invocation_type());
@@ -93,10 +83,9 @@ void bind_dpcpp_csvms(py::module_ &m, const std::string &csvm_name) {
 py::module_ init_dpcpp_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
     // use its own submodule for the DPC++ C-SVM bindings
     py::module_ dpcpp_module = m.def_submodule("dpcpp", "a module containing all DPC++ backend specific functionality");
-    const py::module_ dpcpp_pure_virtual_module = dpcpp_module.def_submodule("__pure_virtual", "a module containing all pure-virtual DPC++ backend specific functionality");
 
     // bind the pure-virtual base DPC++ C-SVM
-    [[maybe_unused]] const py::class_<plssvm::dpcpp::csvm, plssvm::csvm> virtual_base_dpcpp_csvm(dpcpp_pure_virtual_module, "__pure_virtual_dpcpp_base_CSVM");
+    [[maybe_unused]] const py::class_<plssvm::dpcpp::csvm, plssvm::csvm> virtual_base_dpcpp_csvm(m, "__pure_virtual_dpcpp_CSVM", py::module_local());
 
     // bind the specific DPC++ C-SVC and C-SVR classes
     bind_dpcpp_csvms<plssvm::csvc>(dpcpp_module, "CSVC");
diff --git a/bindings/Python/backends/hip_csvm.cpp b/bindings/Python/backends/hip_csvm.cpp
index fc05c13d4..b1f6ca7f1 100644
--- a/bindings/Python/backends/hip_csvm.cpp
+++ b/bindings/Python/backends/hip_csvm.cpp
@@ -9,21 +9,27 @@
 #include "plssvm/backend_types.hpp"            // plssvm::hip::backend_csvm_type_t
 #include "plssvm/backends/HIP/csvm.hpp"        // plssvm::hip::csvm
 #include "plssvm/backends/HIP/exceptions.hpp"  // plssvm::hip::backend_exception
+#include "plssvm/constants.hpp"                // plssvm::real_type
 #include "plssvm/exceptions/exceptions.hpp"    // plssvm::exception
+#include "plssvm/gamma.hpp"                    // plssvm::gamma
+#include "plssvm/kernel_function_types.hpp"    // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"         // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                // plssvm::parameter
 #include "plssvm/svm/csvc.hpp"                 // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                 // plssvm::csvm
 #include "plssvm/svm/csvr.hpp"                 // plssvm::csvr
 #include "plssvm/target_platforms.hpp"         // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception}
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::register_py_exception
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::exception
-#include "pybind11/pytypes.h"   // py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
-#include <memory>  // std::make_unique
-#include <string>  // std::string
+#include <memory>   // std::make_unique
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
@@ -33,34 +39,36 @@ template <typename csvm_type>
 void bind_hip_csvms(py::module_ &m, const std::string &csvm_name) {
     using backend_csvm_type = plssvm::hip::backend_csvm_type_t<csvm_type>;
 
+    // the default parameters used
+    const plssvm::parameter default_params{};
+
     // assemble docstrings
     const std::string class_docstring{ fmt::format("A {} using the HIP backend.", csvm_name) };
-    const std::string param_docstring{ fmt::format("create a HIP {} with the provided parameters", csvm_name) };
-    const std::string target_param_docstring{ fmt::format("create a HIP {} with the provided target platform and parameters", csvm_name) };
-    const std::string kwargs_docstring{ fmt::format("create a HIP {} with the provided keyword arguments", csvm_name) };
-    const std::string target_kwargs_docstring{ fmt::format("create a HIP {} with the provided target platform and keyword arguments", csvm_name) };
+    const std::string params_constructor_docstring{ fmt::format("create a HIP {} with the provided SVM parameter encapsulated in a plssvm.Parameter", csvm_name) };
+    const std::string keyword_args_constructor_docstring{ fmt::format("create a HIP {} with the provided SVM parameter as separate keyword arguments", csvm_name) };
 
     py::class_<backend_csvm_type, plssvm::hip::csvm, csvm_type>(m, csvm_name.c_str(), class_docstring.c_str())
-        .def(py::init<plssvm::parameter>(), param_docstring.c_str())
-        .def(py::init<plssvm::target_platform, plssvm::parameter>(), target_param_docstring.c_str())
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params);
+        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, plssvm::mpi::communicator comm) {
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             kwargs_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the provided target platform
-                 return std::make_unique<backend_csvm_type>(target, params);
+             params_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, plssvm::mpi::communicator comm) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             target_kwargs_docstring.c_str())
+             keyword_args_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("comm") = plssvm::mpi::communicator{})
         .def("__repr__", [csvm_name](const backend_csvm_type &self) {
             return fmt::format("<plssvm.hip.{} with {{ #devices: {} }}>", csvm_name, self.num_available_devices());
         });
@@ -71,10 +79,9 @@ void bind_hip_csvms(py::module_ &m, const std::string &csvm_name) {
 void init_hip_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
     // use its own submodule for the HIP C-SVM bindings
     py::module_ hip_module = m.def_submodule("hip", "a module containing all HIP backend specific functionality");
-    const py::module_ hip_pure_virtual_module = hip_module.def_submodule("__pure_virtual", "a module containing all pure-virtual HIP backend specific functionality");
 
     // bind the pure-virtual base HIP C-SVM
-    [[maybe_unused]] const py::class_<plssvm::hip::csvm, plssvm::csvm> virtual_base_hip_csvm(hip_pure_virtual_module, "__pure_virtual_hip_base_CSVM");
+    [[maybe_unused]] const py::class_<plssvm::hip::csvm, plssvm::csvm> virtual_base_hip_csvm(m, "__pure_virtual_hip_CSVM", py::module_local());
 
     // bind the specific HIP C-SVC and C-SVR classes
     bind_hip_csvms<plssvm::csvc>(hip_module, "CSVC");
diff --git a/bindings/Python/backends/hpx_csvm.cpp b/bindings/Python/backends/hpx_csvm.cpp
index 48710e4c5..91d56d58c 100644
--- a/bindings/Python/backends/hpx_csvm.cpp
+++ b/bindings/Python/backends/hpx_csvm.cpp
@@ -10,21 +10,27 @@
 #include "plssvm/backend_types.hpp"            // plssvm::hpx::backend_csvm_type_t
 #include "plssvm/backends/HPX/csvm.hpp"        // plssvm::hpx::csvm
 #include "plssvm/backends/HPX/exceptions.hpp"  // plssvm::hpx::backend_exception
+#include "plssvm/constants.hpp"                // plssvm::real_type
 #include "plssvm/exceptions/exceptions.hpp"    // plssvm::exception
+#include "plssvm/gamma.hpp"                    // plssvm::gamma
+#include "plssvm/kernel_function_types.hpp"    // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"         // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                // plssvm::parameter
 #include "plssvm/svm/csvc.hpp"                 // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                 // plssvm::csvm
 #include "plssvm/svm/csvr.hpp"                 // plssvm::csvr
 #include "plssvm/target_platforms.hpp"         // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception}
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::register_py_exception
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::exception
-#include "pybind11/pytypes.h"   // py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
-#include <memory>  // std::make_unique
-#include <string>  // std::string
+#include <memory>   // std::make_unique
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
@@ -34,34 +40,36 @@ template <typename csvm_type>
 void bind_hpx_csvms(py::module_ &m, const std::string &csvm_name) {
     using backend_csvm_type = plssvm::hpx::backend_csvm_type_t<csvm_type>;
 
+    // the default parameters used
+    const plssvm::parameter default_params{};
+
     // assemble docstrings
     const std::string class_docstring{ fmt::format("A {} using the HPX backend.", csvm_name) };
-    const std::string param_docstring{ fmt::format("create an HPX {} with the provided parameters", csvm_name) };
-    const std::string target_param_docstring{ fmt::format("create an HPX {} with the provided target platform and parameters", csvm_name) };
-    const std::string kwargs_docstring{ fmt::format("create an HPX {} with the provided keyword arguments", csvm_name) };
-    const std::string target_kwargs_docstring{ fmt::format("create an HPX {} with the provided target platform and keyword arguments", csvm_name) };
+    const std::string params_constructor_docstring{ fmt::format("create a HPX {} with the provided SVM parameter encapsulated in a plssvm.Parameter", csvm_name) };
+    const std::string keyword_args_constructor_docstring{ fmt::format("create a HPX {} with the provided SVM parameter as separate keyword arguments", csvm_name) };
 
     py::class_<backend_csvm_type, plssvm::hpx::csvm, csvm_type>(m, csvm_name.c_str(), class_docstring.c_str())
-        .def(py::init<plssvm::parameter>(), param_docstring.c_str())
-        .def(py::init<plssvm::target_platform, plssvm::parameter>(), target_param_docstring.c_str())
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params);
+        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, plssvm::mpi::communicator comm) {
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             kwargs_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the provided target platform
-                 return std::make_unique<backend_csvm_type>(target, params);
+             params_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, plssvm::mpi::communicator comm) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             target_kwargs_docstring.c_str())
+             keyword_args_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("comm") = plssvm::mpi::communicator{})
         .def("__repr__", [csvm_name](const backend_csvm_type &self) {
             return fmt::format("<plssvm.hpx.{} with {{ #devices: {} }}>", csvm_name, self.num_available_devices());
         });
@@ -72,10 +80,9 @@ void bind_hpx_csvms(py::module_ &m, const std::string &csvm_name) {
 void init_hpx_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
     // use its own submodule for the HPX C-SVM bindings
     py::module_ hpx_module = m.def_submodule("hpx", "a module containing all HPX backend specific functionality");
-    const py::module_ hpx_pure_virtual_module = hpx_module.def_submodule("__pure_virtual", "a module containing all pure-virtual HPX backend specific functionality");
 
     // bind the pure-virtual base HPX C-SVM
-    [[maybe_unused]] const py::class_<plssvm::hpx::csvm, plssvm::csvm> virtual_base_hpx_csvm(hpx_pure_virtual_module, "__pure_virtual_hpx_base_CSVM");
+    [[maybe_unused]] const py::class_<plssvm::hpx::csvm, plssvm::csvm> virtual_base_hpx_csvm(m, "__pure_virtual_hpx_CSVM", py::module_local());
 
     // bind the specific HPX C-SVC and C-SVR classes
     bind_hpx_csvms<plssvm::csvc>(hpx_module, "CSVC");
diff --git a/bindings/Python/backends/kokkos_csvm.cpp b/bindings/Python/backends/kokkos_csvm.cpp
index 884e8a68f..d55dddf2a 100644
--- a/bindings/Python/backends/kokkos_csvm.cpp
+++ b/bindings/Python/backends/kokkos_csvm.cpp
@@ -10,21 +10,27 @@
 #include "plssvm/backends/Kokkos/csvm.hpp"             // plssvm::kokkos::csvm
 #include "plssvm/backends/Kokkos/exceptions.hpp"       // plssvm::kokkos::backend_exception
 #include "plssvm/backends/Kokkos/execution_space.hpp"  // plssvm::kokkos::execution_space
+#include "plssvm/constants.hpp"                        // plssvm::real_type
 #include "plssvm/exceptions/exceptions.hpp"            // plssvm::exception
+#include "plssvm/gamma.hpp"                            // plssvm::gamma
+#include "plssvm/kernel_function_types.hpp"            // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                 // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                        // plssvm::parameter
 #include "plssvm/svm/csvc.hpp"                         // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                         // plssvm::csvm
 #include "plssvm/svm/csvr.hpp"                         // plssvm::csvr
 #include "plssvm/target_platforms.hpp"                 // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception}
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::{register_py_exception, register_implicit_str_enum_conversion}
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::exception
-#include "pybind11/pytypes.h"   // py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
-#include <memory>  // std::make_unique
-#include <string>  // std::string
+#include <memory>   // std::make_unique
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
@@ -34,54 +40,38 @@ template <typename csvm_type>
 void bind_kokkos_csvms(py::module_ &m, const std::string &csvm_name) {
     using backend_csvm_type = plssvm::kokkos::backend_csvm_type_t<csvm_type>;
 
+    // the default parameters used
+    const plssvm::parameter default_params{};
+
     // assemble docstrings
     const std::string class_docstring{ fmt::format("A {} using the Kokkos backend.", csvm_name) };
-    const std::string param_docstring{ fmt::format("create a Kokkos {} with the provided parameters and optional Kokkos specific keyword arguments", csvm_name) };
-    const std::string target_param_docstring{ fmt::format("create a Kokkos {} with the provided target platform, parameters, and optional Kokkos specific keyword arguments", csvm_name) };
-    const std::string kwargs_docstring{ fmt::format("create a Kokkos {} with the provided keyword arguments (including optional Kokkos specific keyword arguments)", csvm_name) };
-    const std::string target_kwargs_docstring{ fmt::format("create a Kokkos {} with the provided target platform and keyword arguments (including optional Kokkos specific keyword arguments)", csvm_name) };
+    const std::string params_constructor_docstring{ fmt::format("create a Kokkos {} with the provided SVM parameter encapsulated in a plssvm.Parameter and optional Kokkos specific keyword arguments", csvm_name) };
+    const std::string keyword_args_constructor_docstring{ fmt::format("create a Kokkos {} with the provided SVM parameter as separate keyword arguments including optional Kokkos specific keyword arguments", csvm_name) };
 
     py::class_<backend_csvm_type, plssvm::kokkos::csvm, csvm_type>(m, csvm_name.c_str(), class_docstring.c_str())
-        .def(py::init([](const plssvm::parameter params, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kokkos_execution_space" });
-                 // set Kokkos execution space
-                 const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast<plssvm::kokkos::execution_space>() : plssvm::kokkos::execution_space::automatic;
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params, plssvm::kokkos_execution_space = space);
-             }),
-             param_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kokkos_execution_space" });
-                 // set Kokkos execution space
-                 const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast<plssvm::kokkos::execution_space>() : plssvm::kokkos::execution_space::automatic;
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(target, params, plssvm::kokkos_execution_space = space);
-             }),
-             target_param_docstring.c_str())
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // set Kokkos execution space
-                 const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast<plssvm::kokkos::execution_space>() : plssvm::kokkos::execution_space::automatic;
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params, plssvm::kokkos_execution_space = space);
+        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const plssvm::kokkos::execution_space space, plssvm::mpi::communicator comm) {
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params, plssvm::kokkos_execution_space = space);
              }),
-             kwargs_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // set Kokkos execution space
-                 const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast<plssvm::kokkos::execution_space>() : plssvm::kokkos::execution_space::automatic;
-                 // create C-SVM with the provided target platform
-                 return std::make_unique<backend_csvm_type>(target, params, plssvm::kokkos_execution_space = space);
+             params_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("kokkos_execution_space") = plssvm::kokkos::execution_space::automatic,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, const plssvm::kokkos::execution_space space, plssvm::mpi::communicator comm) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params, plssvm::kokkos_execution_space = space);
              }),
-             target_kwargs_docstring.c_str())
+             keyword_args_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("kokkos_execution_space") = plssvm::kokkos::execution_space::automatic,
+             py::arg("comm") = plssvm::mpi::communicator{})
         .def("get_execution_space", &plssvm::kokkos::csvm::get_execution_space, "get the Kokkos execution space used in this Kokkos C-SVM")
         .def("__repr__", [csvm_name](const backend_csvm_type &self) {
             return fmt::format("<plssvm.kokkos.{} with {{ #devices: {}, execution_space: {} }}>", csvm_name, self.num_available_devices(), self.get_execution_space());
@@ -93,10 +83,26 @@ void bind_kokkos_csvms(py::module_ &m, const std::string &csvm_name) {
 void init_kokkos_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
     // use its own submodule for the Kokkos C-SVM bindings
     py::module_ kokkos_module = m.def_submodule("kokkos", "a module containing all Kokkos backend specific functionality");
-    const py::module_ kokkos_pure_virtual_module = kokkos_module.def_submodule("__pure_virtual", "a module containing all pure-virtual Kokkos backend specific functionality");
+
+    // bind the enum class
+    py::enum_<plssvm::kokkos::execution_space> py_enum(kokkos_module, "ExecutionSpace", "Enum class for all supported Kokkos execution spaces in PLSSVM.");
+    py_enum
+        .value("AUTOMATIC", plssvm::kokkos::execution_space::automatic, "automatically determine the used Kokkos execution space; note: this does not necessarily correspond to Kokkos::DefaultExecutionSpace!")
+        .value("CUDA", plssvm::kokkos::execution_space::cuda, "execution space representing execution on a CUDA device")
+        .value("HIP", plssvm::kokkos::execution_space::hip, "execution space representing execution on a device supported by HIP")
+        .value("SYCL", plssvm::kokkos::execution_space::sycl, "execution space representing execution on a device supported by SYCL")
+        .value("HPX", plssvm::kokkos::execution_space::hpx, "execution space representing execution with the HPX runtime system")
+        .value("OPENMP", plssvm::kokkos::execution_space::openmp, "execution space representing execution with the OpenMP runtime system")
+        .value("OPENMPTARGET", plssvm::kokkos::execution_space::openmp_target, "execution space representing execution using the target offloading feature of the OpenMP runtime system")
+        .value("OPENACC", plssvm::kokkos::execution_space::openacc, "execution space representing execution with the OpenACC runtime system")
+        .value("THREADS", plssvm::kokkos::execution_space::threads, "execution space representing parallel execution with std::threads")
+        .value("SERIAL", plssvm::kokkos::execution_space::serial, "execution space representing serial execution on the CPU. Should always be available");
+
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::kokkos::execution_space>(py_enum);
 
     // bind the pure-virtual base Kokkos C-SVM
-    [[maybe_unused]] const py::class_<plssvm::kokkos::csvm, plssvm::csvm> virtual_base_kokkos_csvm(kokkos_pure_virtual_module, "__pure_virtual_kokkos_base_CSVM");
+    [[maybe_unused]] const py::class_<plssvm::kokkos::csvm, plssvm::csvm> virtual_base_kokkos_csvm(m, "__pure_virtual_kokkos_CSVM", py::module_local());
 
     // bind the specific Kokkos C-SVC and C-SVR classes
     bind_kokkos_csvms<plssvm::csvc>(kokkos_module, "CSVC");
diff --git a/bindings/Python/backends/opencl_csvm.cpp b/bindings/Python/backends/opencl_csvm.cpp
index 77ae0f2d8..9c3a2f3d0 100644
--- a/bindings/Python/backends/opencl_csvm.cpp
+++ b/bindings/Python/backends/opencl_csvm.cpp
@@ -9,21 +9,27 @@
 #include "plssvm/backend_types.hpp"               // plssvm::opencl::backend_csvm_type_t
 #include "plssvm/backends/OpenCL/csvm.hpp"        // plssvm::opencl::csvm
 #include "plssvm/backends/OpenCL/exceptions.hpp"  // plssvm::opencl::backend_exception
+#include "plssvm/constants.hpp"                   // plssvm::real_type
 #include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
+#include "plssvm/gamma.hpp"                       // plssvm::gamma
+#include "plssvm/kernel_function_types.hpp"       // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"            // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                   // plssvm::parameter
 #include "plssvm/svm/csvc.hpp"                    // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                    // plssvm::csvm
 #include "plssvm/svm/csvr.hpp"                    // plssvm::csvr
 #include "plssvm/target_platforms.hpp"            // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception}
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::register_py_exception
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::exception
-#include "pybind11/pytypes.h"   // py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
-#include <memory>  // std::make_unique
-#include <string>  // std::string
+#include <memory>   // std::make_unique
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
@@ -33,34 +39,36 @@ template <typename csvm_type>
 void bind_opencl_csvms(py::module_ &m, const std::string &csvm_name) {
     using backend_csvm_type = plssvm::opencl::backend_csvm_type_t<csvm_type>;
 
+    // the default parameters used
+    const plssvm::parameter default_params{};
+
     // assemble docstrings
     const std::string class_docstring{ fmt::format("A {} using the OpenCL backend.", csvm_name) };
-    const std::string param_docstring{ fmt::format("create an OpenCL {} with provided parameters", csvm_name) };
-    const std::string target_param_docstring{ fmt::format("create an OpenCL {} with the provided target platform and parameters", csvm_name) };
-    const std::string kwargs_docstring{ fmt::format("create an OpenCL {} with the provided keyword arguments", csvm_name) };
-    const std::string target_kwargs_docstring{ fmt::format("create an OpenCL {} with the provided target platform and keyword arguments", csvm_name) };
+    const std::string params_constructor_docstring{ fmt::format("create an OpenCL {} with the provided SVM parameter encapsulated in a plssvm.Parameter", csvm_name) };
+    const std::string keyword_args_constructor_docstring{ fmt::format("create an OpenCL {} with the provided SVM parameter as separate keyword arguments", csvm_name) };
 
     py::class_<backend_csvm_type, plssvm::opencl::csvm, csvm_type>(m, csvm_name.c_str(), class_docstring.c_str())
-        .def(py::init<plssvm::parameter>(), param_docstring.c_str())
-        .def(py::init<plssvm::target_platform, plssvm::parameter>(), target_param_docstring.c_str())
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params);
+        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, plssvm::mpi::communicator comm) {
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             kwargs_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the provided target platform
-                 return std::make_unique<backend_csvm_type>(target, params);
+             params_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, plssvm::mpi::communicator comm) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             target_kwargs_docstring.c_str())
+             keyword_args_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("comm") = plssvm::mpi::communicator{})
         .def("__repr__", [csvm_name](const backend_csvm_type &self) {
             return fmt::format("<plssvm.opencl.{} with {{ #devices: {} }}>", csvm_name, self.num_available_devices());
         });
@@ -71,10 +79,9 @@ void bind_opencl_csvms(py::module_ &m, const std::string &csvm_name) {
 void init_opencl_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
     // use its own submodule for the OpenCL C-SVM bindings
     py::module_ opencl_module = m.def_submodule("opencl", "a module containing all OpenCL backend specific functionality");
-    const py::module_ opencl_pure_virtual_module = opencl_module.def_submodule("__pure_virtual", "a module containing all pure-virtual OpenCL backend specific functionality");
 
     // bind the pure-virtual base OpenCL C-SVM
-    [[maybe_unused]] const py::class_<plssvm::opencl::csvm, plssvm::csvm> virtual_base_opencl_csvm(opencl_pure_virtual_module, "__pure_virtual_opencl_base_CSVM");
+    [[maybe_unused]] const py::class_<plssvm::opencl::csvm, plssvm::csvm> virtual_base_opencl_csvm(m, "__pure_virtual_opencl_CSVM", py::module_local());
 
     // bind the specific OpenCL C-SVC and C-SVR classes
     bind_opencl_csvms<plssvm::csvc>(opencl_module, "CSVC");
diff --git a/bindings/Python/backends/openmp_csvm.cpp b/bindings/Python/backends/openmp_csvm.cpp
index 376329474..095659542 100644
--- a/bindings/Python/backends/openmp_csvm.cpp
+++ b/bindings/Python/backends/openmp_csvm.cpp
@@ -9,21 +9,27 @@
 #include "plssvm/backend_types.hpp"               // plssvm::openmp::backend_csvm_type_t
 #include "plssvm/backends/OpenMP/csvm.hpp"        // plssvm::openmp::csvm
 #include "plssvm/backends/OpenMP/exceptions.hpp"  // plssvm::openmp::backend_exception
+#include "plssvm/constants.hpp"                   // plssvm::real_type
 #include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
+#include "plssvm/gamma.hpp"                       // plssvm::gamma
+#include "plssvm/kernel_function_types.hpp"       // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"            // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                   // plssvm::parameter
 #include "plssvm/svm/csvc.hpp"                    // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                    // plssvm::csvm
 #include "plssvm/svm/csvr.hpp"                    // plssvm::csvr
 #include "plssvm/target_platforms.hpp"            // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception}
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::register_py_exception
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::exception
-#include "pybind11/pytypes.h"   // py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
-#include <memory>  // std::make_unique
-#include <string>  // std::string
+#include <memory>   // std::make_unique
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
@@ -33,34 +39,36 @@ template <typename csvm_type>
 void bind_openmp_csvms(py::module_ &m, const std::string &csvm_name) {
     using backend_csvm_type = plssvm::openmp::backend_csvm_type_t<csvm_type>;
 
+    // the default parameters used
+    const plssvm::parameter default_params{};
+
     // assemble docstrings
     const std::string class_docstring{ fmt::format("A {} using the OpenMP backend.", csvm_name) };
-    const std::string param_docstring{ fmt::format("create an OpenMP {} with the provided parameters", csvm_name) };
-    const std::string target_param_docstring{ fmt::format("create an OpenMP {} with the provided target platform and parameters", csvm_name) };
-    const std::string kwargs_docstring{ fmt::format("create an OpenMP {} with the provided keyword arguments", csvm_name) };
-    const std::string target_kwargs_docstring{ fmt::format("create an OpenMP {} with the provided target platform and keyword arguments", csvm_name) };
+    const std::string params_constructor_docstring{ fmt::format("create an OpenMP {} with the provided SVM parameter encapsulated in a plssvm.Parameter", csvm_name) };
+    const std::string keyword_args_constructor_docstring{ fmt::format("create an OpenMP {} with the provided SVM parameter as separate keyword arguments", csvm_name) };
 
     py::class_<backend_csvm_type, plssvm::openmp::csvm, csvm_type>(m, csvm_name.c_str(), class_docstring.c_str())
-        .def(py::init<plssvm::parameter>(), param_docstring.c_str())
-        .def(py::init<plssvm::target_platform, plssvm::parameter>(), target_param_docstring.c_str())
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params);
+        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, plssvm::mpi::communicator comm) {
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             kwargs_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the provided target platform
-                 return std::make_unique<backend_csvm_type>(target, params);
+             params_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, plssvm::mpi::communicator comm) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             target_kwargs_docstring.c_str())
+             keyword_args_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("comm") = plssvm::mpi::communicator{})
         .def("__repr__", [csvm_name](const backend_csvm_type &self) {
             return fmt::format("<plssvm.openmp.{} with {{ #devices: {} }}>", csvm_name, self.num_available_devices());
         });
@@ -71,10 +79,9 @@ void bind_openmp_csvms(py::module_ &m, const std::string &csvm_name) {
 void init_openmp_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
     // use its own submodule for the OpenMP C-SVM bindings
     py::module_ openmp_module = m.def_submodule("openmp", "a module containing all OpenMP backend specific functionality");
-    const py::module_ openmp_pure_virtual_module = openmp_module.def_submodule("__pure_virtual", "a module containing all pure-virtual OpenMP backend specific functionality");
 
     // bind the pure-virtual base OpenMP C-SVM
-    [[maybe_unused]] const py::class_<plssvm::openmp::csvm, plssvm::csvm> virtual_base_openmp_csvm(openmp_pure_virtual_module, "__pure_virtual_openmp_base_CSVM");
+    [[maybe_unused]] const py::class_<plssvm::openmp::csvm, plssvm::csvm> virtual_base_openmp_csvm(m, "__pure_virtual_openmp_CSVM", py::module_local());
 
     // bind the specific OpenMP C-SVC and C-SVR classes
     bind_openmp_csvms<plssvm::csvc>(openmp_module, "CSVC");
diff --git a/bindings/Python/backends/stdpar_csvm.cpp b/bindings/Python/backends/stdpar_csvm.cpp
index f1dadad50..df32403e3 100644
--- a/bindings/Python/backends/stdpar_csvm.cpp
+++ b/bindings/Python/backends/stdpar_csvm.cpp
@@ -10,21 +10,27 @@
 #include "plssvm/backends/stdpar/csvm.hpp"                  // plssvm::stdpar::csvm
 #include "plssvm/backends/stdpar/exceptions.hpp"            // plssvm::stdpar::backend_exception
 #include "plssvm/backends/stdpar/implementation_types.hpp"  // plssvm::stdpar::implementation_type
+#include "plssvm/constants.hpp"                             // plssvm::real_type
 #include "plssvm/exceptions/exceptions.hpp"                 // plssvm::exception
+#include "plssvm/gamma.hpp"                                 // plssvm::gamma
+#include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                      // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                             // plssvm::parameter
 #include "plssvm/svm/csvc.hpp"                              // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                              // plssvm::csvm
 #include "plssvm/svm/csvr.hpp"                              // plssvm::csvr
 #include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception}
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::{register_py_exception, register_implicit_str_enum_conversion}
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::exception
-#include "pybind11/pytypes.h"   // py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
-#include <memory>  // std::make_unique
-#include <string>  // std::string
+#include <memory>   // std::make_unique
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
@@ -34,34 +40,36 @@ template <typename csvm_type>
 void bind_stdpar_csvms(py::module_ &m, const std::string &csvm_name) {
     using backend_csvm_type = plssvm::stdpar::backend_csvm_type_t<csvm_type>;
 
+    // the default parameters used
+    const plssvm::parameter default_params{};
+
     // assemble docstrings
     const std::string class_docstring{ fmt::format("A {} using the stdpar backend.", csvm_name) };
-    const std::string param_docstring{ fmt::format("create an stdpar {} with the provided parameters", csvm_name) };
-    const std::string target_param_docstring{ fmt::format("create an stdpar {} with the provided target platform and parameters", csvm_name) };
-    const std::string kwargs_docstring{ fmt::format("create an stdpar {} with the provided keyword arguments", csvm_name) };
-    const std::string target_kwargs_docstring{ fmt::format("create an stdpar {} with the provided target platform and keyword arguments", csvm_name) };
+    const std::string params_constructor_docstring{ fmt::format("create an stdpar {} with the provided SVM parameter encapsulated in a plssvm.Parameter", csvm_name) };
+    const std::string keyword_args_constructor_docstring{ fmt::format("create an stdpar {} with the provided SVM parameter as separate keyword arguments", csvm_name) };
 
     py::class_<backend_csvm_type, plssvm::stdpar::csvm, csvm_type>(m, csvm_name.c_str())
-        .def(py::init<plssvm::parameter>(), param_docstring.c_str())
-        .def(py::init<plssvm::target_platform, plssvm::parameter>(), target_param_docstring.c_str())
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the default target platform
-                 return std::make_unique<backend_csvm_type>(params);
+        .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, plssvm::mpi::communicator comm) {
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             kwargs_docstring.c_str())
-        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value keyword parameter is provided, set the respective value
-                 const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
-                 // create C-SVM with the provided target platform
-                 return std::make_unique<backend_csvm_type>(target, params);
+             params_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, plssvm::mpi::communicator comm) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return std::make_unique<backend_csvm_type>(std::move(comm), target, params);
              }),
-             target_kwargs_docstring.c_str())
+             keyword_args_constructor_docstring.c_str(),
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("comm") = plssvm::mpi::communicator{})
         .def("get_implementation_type", &plssvm::stdpar::csvm::get_implementation_type, "get the stdpar implementation used in this stdpar C-SVM")
         .def("__repr__", [csvm_name](const backend_csvm_type &self) {
             return fmt::format("<plssvm.stdpar.{} with {{ #devices: {}, implementation_type: {} }}>", csvm_name, self.num_available_devices(), self.get_implementation_type());
@@ -73,20 +81,23 @@ void bind_stdpar_csvms(py::module_ &m, const std::string &csvm_name) {
 void init_stdpar_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
     // use its own submodule for the stdpar C-SVM bindings
     py::module_ stdpar_module = m.def_submodule("stdpar", "a module containing all stdpar backend specific functionality");
-    const py::module_ stdpar_pure_virtual_module = stdpar_module.def_submodule("__pure_virtual", "a module containing all pure-virtual stdpar backend specific functionality");
 
     // bind the enum class
-    py::enum_<plssvm::stdpar::implementation_type>(stdpar_module, "ImplementationType", "Enum class for all supported stdpar implementations in PLSSVM.")
+    py::enum_<plssvm::stdpar::implementation_type> py_enum(stdpar_module, "ImplementationType", "Enum class for all supported stdpar implementations in PLSSVM.");
+    py_enum
         .value("NVHPC", plssvm::stdpar::implementation_type::nvhpc, "use NVIDIA's HPC SDK (NVHPC) compiler nvc++")
         .value("ROC_STDPAR", plssvm::stdpar::implementation_type::roc_stdpar, "use AMD's roc-stdpar compiler (patched LLVM)")
         .value("INTEL_LLVM", plssvm::stdpar::implementation_type::intel_llvm, "use Intel's LLVM compiler icpx")
         .value("ADAPTIVECPP", plssvm::stdpar::implementation_type::adaptivecpp, "use AdaptiveCpp (formerly known as hipSYCL)")
         .value("GNU_TBB", plssvm::stdpar::implementation_type::gnu_tbb, "use GNU GCC + Intel's TBB library");
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::stdpar::implementation_type>(py_enum);
+
     stdpar_module.def("list_available_stdpar_implementations", &plssvm::stdpar::list_available_stdpar_implementations, "list all available stdpar implementations");
 
     // bind the pure-virtual base stdpar C-SVM
-    [[maybe_unused]] const py::class_<plssvm::stdpar::csvm, plssvm::csvm> virtual_base_stdpar_csvm(stdpar_pure_virtual_module, "__pure_virtual_stdpar_base_CSVM");
+    [[maybe_unused]] const py::class_<plssvm::stdpar::csvm, plssvm::csvm> virtual_base_stdpar_csvm(m, "__pure_virtual_stdpar_CSVM", py::module_local());
 
     // bind the specific stdpar C-SVC and C-SVR classes
     bind_stdpar_csvms<plssvm::csvc>(stdpar_module, "CSVC");
diff --git a/bindings/Python/backends/sycl.cpp b/bindings/Python/backends/sycl.cpp
index 0421fc308..f2cc924d6 100644
--- a/bindings/Python/backends/sycl.cpp
+++ b/bindings/Python/backends/sycl.cpp
@@ -11,7 +11,7 @@
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
 #include "plssvm/exceptions/exceptions.hpp"                  // plssvm::exception
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::register_py_exception
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{register_py_exception, register_implicit_str_enum_conversion}
 
 #include "pybind11/pybind11.h"  // py::module_, py::enum_, py::exception
 #include "pybind11/stl.h"       // support for STL types: std:vector
@@ -34,17 +34,25 @@ void init_sycl(py::module_ &m, const py::exception<plssvm::exception> &base_exce
     plssvm::bindings::python::util::register_py_exception<plssvm::sycl::backend_exception>(sycl_module, "BackendError", base_exception);
 
     // bind the two enum classes
-    py::enum_<plssvm::sycl::implementation_type>(sycl_module, "ImplementationType", "Enum class for all supported SYCL implementation in PLSSVM.")
+    py::enum_<plssvm::sycl::implementation_type> py_enum_impl(sycl_module, "ImplementationType", "Enum class for all supported SYCL implementation in PLSSVM.");
+    py_enum_impl
         .value("AUTOMATIC", plssvm::sycl::implementation_type::automatic, "use the available SYCL implementation; if more than one implementation is available, the macro PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION must be defined during the CMake configuration")
         .value("DPCPP", plssvm::sycl::implementation_type::dpcpp, "use DPC++ as SYCL implementation")
         .value("ADAPTIVECPP", plssvm::sycl::implementation_type::adaptivecpp, "use AdaptiveCpp (formerly known as hipSYCL) as SYCL implementation");
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::sycl::implementation_type>(py_enum_impl);
+
     sycl_module.def("list_available_sycl_implementations", &plssvm::sycl::list_available_sycl_implementations, "list all available SYCL implementations");
 
-    py::enum_<plssvm::sycl::kernel_invocation_type>(sycl_module, "KernelInvocationType", "Enum class for all possible SYCL kernel invocation types supported in PLSSVM.")
+    py::enum_<plssvm::sycl::kernel_invocation_type> py_enum_invocation(sycl_module, "KernelInvocationType", "Enum class for all possible SYCL kernel invocation types supported in PLSSVM.");
+    py_enum_invocation
         .value("AUTOMATIC", plssvm::sycl::kernel_invocation_type::automatic, "use the best kernel invocation type for the current SYCL implementation and target hardware platform")
         .value("ND_RANGE", plssvm::sycl::kernel_invocation_type::nd_range, "use the nd_range kernel invocation type");
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::sycl::kernel_invocation_type>(py_enum_invocation);
+
     // initialize SYCL binding classes
 #if defined(PLSSVM_SYCL_BACKEND_HAS_ADAPTIVECPP)
     const py::module_ adaptivecpp_module = init_adaptivecpp_csvm(m, base_exception);
diff --git a/bindings/Python/classification_types.cpp b/bindings/Python/classification_types.cpp
index c429973c7..7d145ff84 100644
--- a/bindings/Python/classification_types.cpp
+++ b/bindings/Python/classification_types.cpp
@@ -8,7 +8,9 @@
 
 #include "plssvm/classification_types.hpp"  // plssvm::classification_type, plssvm::classification_type_to_full_string, plssvm::calculate_number_of_classifiers
 
-#include "pybind11/pybind11.h"  // py::module_, py::enum_
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::register_implicit_str_enum_conversion
+
+#include "pybind11/pybind11.h"  // py::module_, py::enum_, py::arg
 
 #include <string>  // std::string
 
@@ -16,11 +18,15 @@ namespace py = pybind11;
 
 void init_classification_types(py::module_ &m) {
     // bind enum class
-    py::enum_<plssvm::classification_type>(m, "ClassificationType", "Enum class for all implemented multiclass classification strategies.")
+    py::enum_<plssvm::classification_type> py_enum(m, "ClassificationType", "Enum class for all implemented multiclass classification strategies.");
+    py_enum
         .value("OAA", plssvm::classification_type::oaa, "use the one vs. all classification strategy (default)")
         .value("OAO", plssvm::classification_type::oao, "use the one vs. one classification strategy");
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::classification_type>(py_enum);
+
     // bind free functions
-    m.def("classification_type_to_full_string", &plssvm::classification_type_to_full_string, "convert the classification type to its full string representation");
-    m.def("calculate_number_of_classifiers", &plssvm::calculate_number_of_classifiers, "given the classification strategy and number of classes , calculates the number of necessary classifiers");
+    m.def("classification_type_to_full_string", &plssvm::classification_type_to_full_string, "convert the classification type to its full string representation", py::arg("classification"));
+    m.def("calculate_number_of_classifiers", &plssvm::calculate_number_of_classifiers, "given the classification strategy and number of classes , calculates the number of necessary classifiers", py::arg("classification"), py::arg("num_classes"));
 }
diff --git a/bindings/Python/data_set/classification_data_set.cpp b/bindings/Python/data_set/classification_data_set.cpp
index 89d62ebb3..1ffacb2e4 100644
--- a/bindings/Python/data_set/classification_data_set.cpp
+++ b/bindings/Python/data_set/classification_data_set.cpp
@@ -13,16 +13,18 @@
 #include "plssvm/detail/type_traits.hpp"       // plssvm::detail::remove_cvref_t
 #include "plssvm/file_format_types.hpp"        // plssvm::file_format_type
 #include "plssvm/matrix.hpp"                   // plssvm::soa_matrix
+#include "plssvm/mpi/communicator.hpp"         // plssvm::mpi::communicator
 
 #include "bindings/Python/data_set/variant_wrapper.hpp"                 // plssvm::bindings::python::util::classification_data_set_wrapper
 #include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp"  // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper
 #include "bindings/Python/type_caster/matrix_type_caster.hpp"           // a custom Pybind11 type caster for a plssvm::matrix
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"              // a custom Pybind11 type caster for a plssvm::mpi::communicator
 #include "bindings/Python/utility.hpp"                                  // plssvm::bindings::python::util::{create_instance, python_type_name_mapping, vector_to_pyarray}
 
 #include "fmt/format.h"         // fmt::format
 #include "fmt/ranges.h"         // fmt::join
 #include "pybind11/numpy.h"     // py::array_t, py::array
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::pos_only, py::attribute_error
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::kw_only, py::attribute_error
 #include "pybind11/pytypes.h"   // py::type
 #include "pybind11/stl.h"       // support for STL types
 
@@ -38,54 +40,56 @@ void init_classification_data_set(py::module_ &m) {
     using plssvm::bindings::python::util::classification_data_set_wrapper;
 
     py::class_<classification_data_set_wrapper>(m, "ClassificationDataSet", "Encapsulate all necessary data that is needed for training or predicting using an C-SVC.")
-        .def(py::init([](const std::string &filename, const std::optional<py::type> type, const plssvm::file_format_type format, const std::optional<plssvm::min_max_scaler> scaler) {
+        .def(py::init([](const std::string &filename, const std::optional<py::type> type, const plssvm::file_format_type format, const std::optional<plssvm::min_max_scaler> scaler, plssvm::mpi::communicator comm) {
                  if (type.has_value()) {
                      if (scaler.has_value()) {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_data_set, typename classification_data_set_wrapper::possible_data_set_types>(type.value(), filename, format, scaler.value()));
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_data_set, typename classification_data_set_wrapper::possible_data_set_types>(type.value(), std::move(comm), filename, format, scaler.value()));
                      } else {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_data_set, typename classification_data_set_wrapper::possible_data_set_types>(type.value(), filename, format));
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_data_set, typename classification_data_set_wrapper::possible_data_set_types>(type.value(), std::move(comm), filename, format));
                      }
                  } else {
                      if (scaler.has_value()) {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<std::string>{ filename, format, scaler.value() });
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<std::string>{ std::move(comm), filename, format, scaler.value() });
                      } else {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<std::string>{ filename, format });
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<std::string>{ std::move(comm), filename, format });
                      }
                  }
              }),
              "create a new data set from the provided file and additional optional parameters like the used label type",
              py::arg("filename"),
-             py::pos_only(),
+             py::kw_only(),
              py::arg("type") = std::nullopt,
              py::arg("format") = plssvm::file_format_type::libsvm,
-             py::arg("scaler") = std::nullopt)
-        .def(py::init([](plssvm::soa_matrix<plssvm::real_type> data, const std::optional<py::type> type, const std::optional<plssvm::min_max_scaler> scaler) {
+             py::arg("scaler") = std::nullopt,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](plssvm::soa_matrix<plssvm::real_type> data, const std::optional<py::type> type, const std::optional<plssvm::min_max_scaler> scaler, plssvm::mpi::communicator comm) {
                  if (type.has_value()) {
                      if (scaler.has_value()) {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_data_set, typename classification_data_set_wrapper::possible_data_set_types>(type.value(), std::move(data), scaler.value()));
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_data_set, typename classification_data_set_wrapper::possible_data_set_types>(type.value(), std::move(comm), std::move(data), scaler.value()));
                      } else {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_data_set, typename classification_data_set_wrapper::possible_data_set_types>(type.value(), std::move(data)));
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_data_set, typename classification_data_set_wrapper::possible_data_set_types>(type.value(), std::move(comm), std::move(data)));
                      }
                  } else {
                      if (scaler.has_value()) {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<std::string>{ std::move(data), scaler.value() });
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<std::string>{ std::move(comm), std::move(data), scaler.value() });
                      } else {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<std::string>{ std::move(data) });
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<std::string>{ std::move(comm), std::move(data) });
                      }
                  }
              }),
              "create a new data set from the provided data and additional optional parameters like the used label type",
              py::arg("X"),
-             py::pos_only(),
+             py::kw_only(),
              py::arg("type") = std::nullopt,
-             py::arg("scaler") = std::nullopt)
-        .def(py::init([](plssvm::soa_matrix<plssvm::real_type> data, plssvm::bindings::python::util::label_vector_wrapper<typename classification_data_set_wrapper::possible_vector_types> labels, const std::optional<plssvm::min_max_scaler> scaler) {
+             py::arg("scaler") = std::nullopt,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](plssvm::soa_matrix<plssvm::real_type> data, plssvm::bindings::python::util::label_vector_wrapper<typename classification_data_set_wrapper::possible_vector_types> labels, const std::optional<plssvm::min_max_scaler> scaler, plssvm::mpi::communicator comm) {
                  return std::visit([&](auto &&labels_vector) {
                      using label_type = typename plssvm::detail::remove_cvref_t<decltype(labels_vector)>::value_type;
                      if (scaler.has_value()) {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<label_type>(std::move(data), std::move(labels_vector), scaler.value()));
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<label_type>(std::move(comm), std::move(data), std::move(labels_vector), scaler.value()));
                      } else {
-                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<label_type>(std::move(data), std::move(labels_vector)));
+                         return std::make_unique<classification_data_set_wrapper>(plssvm::classification_data_set<label_type>(std::move(comm), std::move(data), std::move(labels_vector)));
                      }
                  },
                                    labels.labels);
@@ -93,9 +97,10 @@ void init_classification_data_set(py::module_ &m) {
              "create a new data set from the provided data and labels and additional optional parameters",
              py::arg("X"),
              py::arg("y"),
-             py::pos_only(),
-             py::arg("scaler") = std::nullopt)
-        .def("save", [](const classification_data_set_wrapper &self, const std::string &filename, const plssvm::file_format_type format) { std::visit([&filename, format](auto &&data) { data.save(filename, format); }, self.data_set); }, "save the data set to a file using the provided file format type", py::arg("filename"), py::pos_only(), py::arg("format") = plssvm::file_format_type::libsvm)
+             py::kw_only(),
+             py::arg("scaler") = std::nullopt,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def("save", [](const classification_data_set_wrapper &self, const std::string &filename, const plssvm::file_format_type format) { std::visit([&filename, format](auto &&data) { data.save(filename, format); }, self.data_set); }, "save the data set to a file using the provided file format type", py::arg("filename"), py::kw_only(), py::arg("format") = plssvm::file_format_type::libsvm)
         .def("data", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return py::cast(data.data()); }, self.data_set); }, "the data saved as 2D vector")
         .def("has_labels", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.has_labels(); }, self.data_set); }, "check whether the data set has labels")
         // clang-format off
@@ -127,6 +132,7 @@ void init_classification_data_set(py::module_ &m) {
                     return plssvm::bindings::python::util::vector_to_pyarray(data.classes().value());
                 }
             }, self.data_set); }, "the number of classes")
+        .def("communicator", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.communicator(); }, self.data_set); }, "the associated MPI communicator")
         .def("__repr__", [](const classification_data_set_wrapper &self) {
             return std::visit([](auto &&data) {
                 std::string optional_repr{};
diff --git a/bindings/Python/data_set/min_max_scaler.cpp b/bindings/Python/data_set/min_max_scaler.cpp
index 478cb306f..5779b9e29 100644
--- a/bindings/Python/data_set/min_max_scaler.cpp
+++ b/bindings/Python/data_set/min_max_scaler.cpp
@@ -8,9 +8,11 @@
 
 #include "plssvm/data_set/min_max_scaler.hpp"  // plssvm::min_max_scaler
 
-#include "plssvm/constants.hpp"  // plssvm::real_type
+#include "plssvm/constants.hpp"         // plssvm::real_type
+#include "plssvm/mpi/communicator.hpp"  // plssvm::mpi::communicator
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::vector_to_pyarray
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::vector_to_pyarray
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/numpy.h"     // py::array
@@ -22,6 +24,7 @@
 #include <cstddef>   // std::size_t
 #include <optional>  // std::optional, std::nullopt
 #include <string>    // std::string
+#include <utility>   // std::move
 
 namespace py = pybind11;
 
@@ -43,20 +46,39 @@ void init_min_max_scaler(py::module_ &m) {
 
     // bind the plssvm::min_max_scaler class
     py::class_<plssvm::min_max_scaler>(m, "MinMaxScaler", "Implements all necessary data and functions needed for scaling a plssvm::data_set to an user-defined range [lower, upper].")
-        .def(py::init<plssvm::real_type, plssvm::real_type>(), "create new scaling factors for the range [lower, upper]", py::arg("lower"), py::arg("upper"))
-        .def(py::init([](const std::array<plssvm::real_type, 2> interval) {
-                 return plssvm::min_max_scaler{ interval[0], interval[1] };
+        .def(py::init([](const plssvm::real_type lower, const plssvm::real_type upper, plssvm::mpi::communicator comm) {
+                 return plssvm::min_max_scaler{ std::move(comm), lower, upper };
              }),
-             "create new scaling factors for the range [lower, upper]")
-        .def(py::init([](const py::tuple interval) {
+             "create new scaling factors for the range [lower, upper]",
+             py::arg("lower"),
+             py::arg("upper"),
+             py::kw_only(),
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const std::array<plssvm::real_type, 2> interval, plssvm::mpi::communicator comm) {
+                 return plssvm::min_max_scaler{ std::move(comm), interval[0], interval[1] };
+             }),
+             "create new scaling factors for the range [lower, upper]",
+             py::arg("interval"),
+             py::kw_only(),
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const py::tuple interval, plssvm::mpi::communicator comm) {
                  if (interval.size() != 2) {
                      throw py::value_error{ fmt::format("MinMaxScaler can only be created from two interval values (lower, upper), but {} were provided!", interval.size()) };
                  }
-                 return plssvm::min_max_scaler{ interval[0].cast<plssvm::real_type>(), interval[1].cast<plssvm::real_type>() };
+                 return plssvm::min_max_scaler{ std::move(comm), interval[0].cast<plssvm::real_type>(), interval[1].cast<plssvm::real_type>() };
+             }),
+             "create new scaling factors for the range [lower, upper]",
+             py::arg("interval"),
+             py::kw_only(),
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const std::string &filename, plssvm::mpi::communicator comm) {
+                 return plssvm::min_max_scaler{ std::move(comm), filename };
              }),
-             "create new scaling factors for the range [lower, upper]")
-        .def(py::init<const std::string &>(), "read the scaling factors from the file")
-        .def("save", &plssvm::min_max_scaler::save, "save the scaling factors to a file")
+             "read the scaling factors from the file",
+             py::arg("filename"),
+             py::kw_only(),
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def("save", &plssvm::min_max_scaler::save, "save the scaling factors to a file", py::arg("filename"))
         .def("scaling_interval", &plssvm::min_max_scaler::scaling_interval, "the interval to which the data points are scaled")
         .def("scaling_factors", [](const plssvm::min_max_scaler &self) -> std::optional<py::array> {
                 const auto scaling_factors = self.scaling_factors();
@@ -65,6 +87,7 @@ void init_min_max_scaler(py::module_ &m) {
                 } else {
                     return std::nullopt;
                 } }, "the scaling factors for each feature")
+        .def("communicator", &plssvm::min_max_scaler::communicator, "the associated MPI communicator")
         .def("__repr__", [](const plssvm::min_max_scaler &self) {
             std::string optional_repr{};
             const auto scaling_factors = self.scaling_factors();
diff --git a/bindings/Python/data_set/regression_data_set.cpp b/bindings/Python/data_set/regression_data_set.cpp
index 2cc304ede..c0369ee0d 100644
--- a/bindings/Python/data_set/regression_data_set.cpp
+++ b/bindings/Python/data_set/regression_data_set.cpp
@@ -13,16 +13,18 @@
 #include "plssvm/detail/type_traits.hpp"       // plssvm::detail::remove_cvref_t
 #include "plssvm/file_format_types.hpp"        // plssvm::file_format_type
 #include "plssvm/matrix.hpp"                   // plssvm::soa_matrix
+#include "plssvm/mpi/communicator.hpp"         // plssvm::mpi::communicator
 
 #include "bindings/Python/data_set/variant_wrapper.hpp"                 // plssvm::bindings::python::util::regression_data_set_wrapper
 #include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp"  // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper
 #include "bindings/Python/type_caster/matrix_type_caster.hpp"           // a custom Pybind11 type caster for a plssvm::matrix
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"              // a custom Pybind11 type caster for a plssvm::mpi::communicator
 #include "bindings/Python/utility.hpp"                                  // plssvm::bindings::python::util::{create_instance, python_type_name_mapping, vector_to_pyarray}
 
 #include "fmt/format.h"         // fmt::format
 #include "fmt/ranges.h"         // fmt::join
 #include "pybind11/numpy.h"     // py::array_t, py::array
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::pos_only, py::object, py::attribute_error
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::kw_only, py::object, py::attribute_error
 #include "pybind11/pytypes.h"   // py::type
 #include "pybind11/stl.h"       // support for STL types
 
@@ -38,54 +40,56 @@ void init_regression_data_set(py::module_ &m) {
     using plssvm::bindings::python::util::regression_data_set_wrapper;
 
     py::class_<regression_data_set_wrapper>(m, "RegressionDataSet", "Encapsulate all necessary data that is needed for training or predicting using an C-SVR.")
-        .def(py::init([](const std::string &filename, const std::optional<py::type> type, const plssvm::file_format_type format, const std::optional<plssvm::min_max_scaler> scaler) {
+        .def(py::init([](const std::string &filename, const std::optional<py::type> type, const plssvm::file_format_type format, const std::optional<plssvm::min_max_scaler> scaler, plssvm::mpi::communicator comm) {
                  if (type.has_value()) {
                      if (scaler.has_value()) {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_data_set, typename regression_data_set_wrapper::possible_data_set_types>(type.value(), filename, format, scaler.value()));
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_data_set, typename regression_data_set_wrapper::possible_data_set_types>(type.value(), std::move(comm), filename, format, scaler.value()));
                      } else {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_data_set, typename regression_data_set_wrapper::possible_data_set_types>(type.value(), filename, format));
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_data_set, typename regression_data_set_wrapper::possible_data_set_types>(type.value(), std::move(comm), filename, format));
                      }
                  } else {
                      if (scaler.has_value()) {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<double>{ filename, format, scaler.value() });
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<double>{ std::move(comm), filename, format, scaler.value() });
                      } else {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<double>{ filename, format });
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<double>{ std::move(comm), filename, format });
                      }
                  }
              }),
              "create a new data set from the provided file and additional optional parameters",
              py::arg("filename"),
-             py::pos_only(),
+             py::kw_only(),
              py::arg("type") = std::nullopt,
              py::arg("format") = plssvm::file_format_type::libsvm,
-             py::arg("scaler") = std::nullopt)
-        .def(py::init([](plssvm::soa_matrix<plssvm::real_type> data, const std::optional<py::type> type, const std::optional<plssvm::min_max_scaler> scaler) {
+             py::arg("scaler") = std::nullopt,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](plssvm::soa_matrix<plssvm::real_type> data, const std::optional<py::type> type, const std::optional<plssvm::min_max_scaler> scaler, plssvm::mpi::communicator comm) {
                  if (type.has_value()) {
                      if (scaler.has_value()) {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_data_set, typename regression_data_set_wrapper::possible_data_set_types>(type.value(), std::move(data), scaler.value()));
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_data_set, typename regression_data_set_wrapper::possible_data_set_types>(type.value(), std::move(comm), std::move(data), scaler.value()));
                      } else {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_data_set, typename regression_data_set_wrapper::possible_data_set_types>(type.value(), std::move(data)));
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_data_set, typename regression_data_set_wrapper::possible_data_set_types>(type.value(), std::move(comm), std::move(data)));
                      }
                  } else {
                      if (scaler.has_value()) {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<double>{ std::move(data), scaler.value() });
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<double>{ std::move(comm), std::move(data), scaler.value() });
                      } else {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<double>{ std::move(data) });
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<double>{ std::move(comm), std::move(data) });
                      }
                  }
              }),
              "create a new data set from the provided file and additional optional parameters",
              py::arg("X"),
-             py::pos_only(),
+             py::kw_only(),
              py::arg("type") = std::nullopt,
-             py::arg("scaler") = std::nullopt)
-        .def(py::init([](plssvm::soa_matrix<plssvm::real_type> data, plssvm::bindings::python::util::label_vector_wrapper<typename regression_data_set_wrapper::possible_vector_types> labels, const std::optional<plssvm::min_max_scaler> scaler) {
+             py::arg("scaler") = std::nullopt,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](plssvm::soa_matrix<plssvm::real_type> data, plssvm::bindings::python::util::label_vector_wrapper<typename regression_data_set_wrapper::possible_vector_types> labels, const std::optional<plssvm::min_max_scaler> scaler, plssvm::mpi::communicator comm) {
                  return std::visit([&](auto &&labels_vector) {
                      using label_type = typename plssvm::detail::remove_cvref_t<decltype(labels_vector)>::value_type;
                      if (scaler.has_value()) {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<label_type>(std::move(data), std::move(labels_vector), scaler.value()));
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<label_type>(std::move(comm), std::move(data), std::move(labels_vector), scaler.value()));
                      } else {
-                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<label_type>(std::move(data), std::move(labels_vector)));
+                         return std::make_unique<regression_data_set_wrapper>(plssvm::regression_data_set<label_type>(std::move(comm), std::move(data), std::move(labels_vector)));
                      }
                  },
                                    labels.labels);
@@ -93,9 +97,10 @@ void init_regression_data_set(py::module_ &m) {
              "create a new data set from the provided file and additional optional parameters",
              py::arg("X"),
              py::arg("y"),
-             py::pos_only(),
-             py::arg("scaler") = std::nullopt)
-        .def("save", [](const regression_data_set_wrapper &self, const std::string &filename, const plssvm::file_format_type format) { std::visit([&filename, format](auto &&data) { data.save(filename, format); }, self.data_set); }, "save the data set to a file using the provided file format type", py::arg("filename"), py::pos_only(), py::arg("format") = plssvm::file_format_type::libsvm)
+             py::kw_only(),
+             py::arg("scaler") = std::nullopt,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def("save", [](const regression_data_set_wrapper &self, const std::string &filename, const plssvm::file_format_type format) { std::visit([&filename, format](auto &&data) { data.save(filename, format); }, self.data_set); }, "save the data set to a file using the provided file format type", py::arg("filename"), py::kw_only(), py::arg("format") = plssvm::file_format_type::libsvm)
         .def("data", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return py::cast(data.data()); }, self.data_set); }, "the data saved as 2D vector")
         .def("has_labels", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.has_labels(); }, self.data_set); }, "check whether the data set has labels")
         // clang-format off
@@ -118,6 +123,7 @@ void init_regression_data_set(py::module_ &m) {
                 return data.scaling_factors().value();
             } }, self.data_set); }, py::return_value_policy::reference_internal, "the factors used to scale this data set")
         // clang-format off
+        .def("communicator", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.communicator(); }, self.data_set); }, "the associated MPI communicator")
         .def("__repr__", [](const regression_data_set_wrapper &self) {
             return std::visit([](auto &&data) {
                 std::string optional_repr{};
diff --git a/bindings/Python/data_set/variant_wrapper.hpp b/bindings/Python/data_set/variant_wrapper.hpp
index 55f5fb8a5..46d853bf5 100644
--- a/bindings/Python/data_set/variant_wrapper.hpp
+++ b/bindings/Python/data_set/variant_wrapper.hpp
@@ -81,24 +81,18 @@ struct classification_data_set_wrapper {
  */
 struct regression_data_set_wrapper {
     /// A std::variant containing all possible regression data set label types.
-    using possible_vector_types = std::variant<std::vector<std::int16_t>,   // np.int16
-                                               std::vector<std::uint16_t>,  // np.uint16
-                                               std::vector<std::int32_t>,   // np.int32
-                                               std::vector<std::uint32_t>,  // np.uint32
-                                               std::vector<std::int64_t>,   // np.int64
-                                               std::vector<std::uint64_t>,  // np.uint64
-                                               std::vector<float>,          // np.float32
-                                               std::vector<double>>;        // np.float64
+    using possible_vector_types = std::variant<std::vector<std::int16_t>,  // np.int16
+                                               std::vector<std::int32_t>,  // np.int32
+                                               std::vector<std::int64_t>,  // np.int64
+                                               std::vector<float>,         // np.float32
+                                               std::vector<double>>;       // np.float64
 
     /// A std::variant containing all possible regression data set types.
-    using possible_data_set_types = std::variant<plssvm::regression_data_set<std::int16_t>,   // np.int16
-                                                 plssvm::regression_data_set<std::uint16_t>,  // np.uint16
-                                                 plssvm::regression_data_set<std::int32_t>,   // np.int32
-                                                 plssvm::regression_data_set<std::uint32_t>,  // np.uint32
-                                                 plssvm::regression_data_set<std::int64_t>,   // np.int64
-                                                 plssvm::regression_data_set<std::uint64_t>,  // np.uint64
-                                                 plssvm::regression_data_set<float>,          // np.float32
-                                                 plssvm::regression_data_set<double>>;        // np.float64
+    using possible_data_set_types = std::variant<plssvm::regression_data_set<std::int16_t>,  // np.int16
+                                                 plssvm::regression_data_set<std::int32_t>,  // np.int32
+                                                 plssvm::regression_data_set<std::int64_t>,  // np.int64
+                                                 plssvm::regression_data_set<float>,         // np.float32
+                                                 plssvm::regression_data_set<double>>;       // np.float64
 
     /**
      * @brief Construct a new regression data set by setting the active std::variant member.
diff --git a/bindings/Python/detail/tracking/events.cpp b/bindings/Python/detail/tracking/events.cpp
index 86f3ae161..d08cfe5de 100644
--- a/bindings/Python/detail/tracking/events.cpp
+++ b/bindings/Python/detail/tracking/events.cpp
@@ -18,15 +18,13 @@ namespace py = pybind11;
 
 void init_events(py::module_ &m) {
     // use a detail.tracking.PerformanceTracker submodule for the performance tracking bindings
-    py::module_ detail_module = m.def_submodule("detail", "a module containing detail functionality");
-    py::module_ tracking_module = detail_module.def_submodule("tracking", "a module containing performance tracking and hardware sampling functionality");
-    const py::module_ performance_tracker_module = tracking_module.def_submodule("PerformanceTracker");
+    py::module_ tracking_module = m.def_submodule("performance_tracking", "a module containing performance tracking functionality");
 
     using event_type = plssvm::detail::tracking::events::event;
 
     // bind a single event
-    py::class_<event_type>(performance_tracker_module, "Event", "A class encapsulating a single event: name + timestamp where the event occurred.")
-        .def(py::init<decltype(event_type::time_point), decltype(event_type::name)>(), "construct a new event using a time point and a name")
+    py::class_<event_type>(tracking_module, "Event", "A class encapsulating a single event: name + timestamp where the event occurred.")
+        .def(py::init<decltype(event_type::time_point), decltype(event_type::name)>(), "construct a new event using a time point and a name", py::arg("time_point"), py::arg("name"))
         .def_readonly("time_point", &event_type::time_point, "read the time point associated to this event")
         .def_readonly("name", &event_type::name, "read the name associated to this event")
         .def("__repr__", [](const event_type &self) {
@@ -34,10 +32,10 @@ void init_events(py::module_ &m) {
         });
 
     // bind the events wrapper
-    py::class_<plssvm::detail::tracking::events>(performance_tracker_module, "Events", "A class encapsulating all occurred events.")
+    py::class_<plssvm::detail::tracking::events>(tracking_module, "Events", "A class encapsulating all occurred events.")
         .def(py::init<>(), "construct an empty events wrapper")
-        .def("add_event", py::overload_cast<event_type>(&plssvm::detail::tracking::events::add_event), "add a new event")
-        .def("add_event", py::overload_cast<decltype(event_type::time_point), decltype(event_type::name)>(&plssvm::detail::tracking::events::add_event), "add a new event using a time point and a name")
+        .def("add_event", py::overload_cast<event_type>(&plssvm::detail::tracking::events::add_event), "add a new event", py::arg("event"))
+        .def("add_event", py::overload_cast<decltype(event_type::time_point), decltype(event_type::name)>(&plssvm::detail::tracking::events::add_event), "add a new event using a time point and a name", py::arg("time_point"), py::arg("name"))
         .def("at", &plssvm::detail::tracking::events::operator[], "get the i-th event")
         .def("num_events", &plssvm::detail::tracking::events::num_events, "get the number of events")
         .def("empty", &plssvm::detail::tracking::events::empty, "check whether there are any events")
diff --git a/bindings/Python/detail/tracking/performance_tracker.cpp b/bindings/Python/detail/tracking/performance_tracker.cpp
index 53abdc23c..dcf0f0616 100644
--- a/bindings/Python/detail/tracking/performance_tracker.cpp
+++ b/bindings/Python/detail/tracking/performance_tracker.cpp
@@ -22,25 +22,23 @@ namespace py = pybind11;
 
 void init_performance_tracker(py::module_ &m) {
     // use a detail.tracking.PerformanceTracker submodule for the performance tracking bindings
-    py::module_ detail_module = m.def_submodule("detail", "a module containing detail functionality");
-    py::module_ tracking_module = detail_module.def_submodule("tracking", "a module containing performance tracking and hardware sampling functionality");
-    py::module_ performance_tracker_module = tracking_module.def_submodule("PerformanceTracker");
+    py::module_ tracking_module = m.def_submodule("performance_tracking", "a module containing performance tracking functionality");
 
     // bind the performance tracker functions
-    performance_tracker_module
+    tracking_module
         // clang-format off
         .def("add_string_tracking_entry", [](const std::string &category, const std::string &name, const std::string &value) {
             plssvm::detail::tracking::global_performance_tracker().add_tracking_entry(plssvm::detail::tracking::tracking_entry{ category, name, value });
-        }, "add a new generic string tracking entry")
+        }, "add a new generic string tracking entry", py::arg("category"), py::arg("name"), py::arg("value"))
         .def("add_parameter_tracking_entry", [](const plssvm::parameter &params) {
             plssvm::detail::tracking::global_performance_tracker().add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "parameter", "", params });
-        }, "add a new parameter tracking entry")
+        }, "add a new parameter tracking entry", py::arg("params"))
         // clang-format on
-        .def("add_event", [](const std::string &name) { plssvm::detail::tracking::global_performance_tracker().add_event(name); }, "add a new event")
+        .def("add_event", [](const std::string &name) { plssvm::detail::tracking::global_performance_tracker().add_event(name); }, "add a new event", py::arg("name"))
         .def("pause", []() { plssvm::detail::tracking::global_performance_tracker().pause_tracking(); }, "pause performance tracking")
         .def("resume", []() { plssvm::detail::tracking::global_performance_tracker().resume_tracking(); }, "resume performance tracking")
-        .def("save", [](const std::string &filename) { plssvm::detail::tracking::global_performance_tracker().save(filename); }, "save the performance tracking results to the specified yaml file")
-        .def("set_reference_time", [](const std::chrono::steady_clock::time_point time) { plssvm::detail::tracking::global_performance_tracker().set_reference_time(time); }, "set a new reference time")
+        .def("save", [](const std::string &filename) { plssvm::detail::tracking::global_performance_tracker().save(filename); }, "save the performance tracking results to the specified yaml file", py::arg("filename"))
+        .def("set_reference_time", [](const std::chrono::steady_clock::time_point time) { plssvm::detail::tracking::global_performance_tracker().set_reference_time(time); }, "set a new reference time", py::arg("reference_time"))
         .def("get_reference_time", []() { return plssvm::detail::tracking::global_performance_tracker().get_reference_time(); }, "get the current reference time")
         .def("is_tracking", []() { return plssvm::detail::tracking::global_performance_tracker().is_tracking(); }, "check whether performance tracking is currently enabled")
         .def("get_tracking_entries", []() { return plssvm::detail::tracking::global_performance_tracker().get_tracking_entries(); }, py::return_value_policy::reference, "retrieve all currently added tracking entries")
diff --git a/bindings/Python/file_format_types.cpp b/bindings/Python/file_format_types.cpp
index 4d50efedc..71a891da0 100644
--- a/bindings/Python/file_format_types.cpp
+++ b/bindings/Python/file_format_types.cpp
@@ -8,13 +8,19 @@
 
 #include "plssvm/file_format_types.hpp"  // plssvm::file_format_type
 
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::register_implicit_str_enum_conversion
+
 #include "pybind11/pybind11.h"  // py::module_, py::enum_
 
 namespace py = pybind11;
 
 void init_file_format_types(py::module_ &m) {
     // bind enum class
-    py::enum_<plssvm::file_format_type>(m, "FileFormatType", "Enum class for all supported file types.")
+    py::enum_<plssvm::file_format_type> py_enum(m, "FileFormatType", "Enum class for all supported file types.");
+    py_enum
         .value("LIBSVM", plssvm::file_format_type::libsvm, "the LIBSVM file format (default); for the file format specification see: https://www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html")
         .value("ARFF", plssvm::file_format_type::arff, "the ARFF file format; for the file format specification see: https://www.cs.waikato.ac.nz/~ml/weka/arff.html");
+
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::file_format_type>(py_enum);
 }
diff --git a/bindings/Python/gamma.cpp b/bindings/Python/gamma.cpp
index 36812224e..207c85745 100644
--- a/bindings/Python/gamma.cpp
+++ b/bindings/Python/gamma.cpp
@@ -12,6 +12,7 @@
 #include "plssvm/matrix.hpp"     // plssvm::aos_matrix
 
 #include "bindings/Python/type_caster/matrix_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::matrix
+#include "bindings/Python/utility.hpp"                         // plssvm::bindings::python::util::register_implicit_str_enum_conversion
 
 #include "pybind11/pybind11.h"  // py::module_, py::enum_
 #include "pybind11/stl.h"       // support for STL types: std::variant
@@ -20,13 +21,21 @@ namespace py = pybind11;
 
 void init_gamma(py::module_ &m) {
     // bind enum class
-    py::enum_<plssvm::gamma_coefficient_type>(m, "GammaCoefficientType", "Enum class for all possible gamma coefficient types (can also be a number).")
+    py::enum_<plssvm::gamma_coefficient_type> py_enum(m, "GammaCoefficientType", "Enum class for all possible gamma coefficient types (can also be a number).");
+    py_enum
         .value("AUTOMATIC", plssvm::gamma_coefficient_type::automatic, "use a dynamic gamma value of 1 / num_features for the kernel functions")
         .value("SCALE", plssvm::gamma_coefficient_type::scale, "use a dynamic gamma value of 1 / (num_features * data.var()) for the kernel functions");
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::gamma_coefficient_type>(py_enum);
+
     // bind free functions
-    m.def("get_gamma_string", &plssvm::get_gamma_string, "get the gamma string based on the currently active variant member");
-    m.def("calculate_gamma_value", [](const plssvm::gamma_type &gamma, const plssvm::aos_matrix<plssvm::real_type> &data) {
-        return plssvm::calculate_gamma_value(gamma, data);
-    });
+    m.def("get_gamma_string", &plssvm::get_gamma_string, "get the gamma string based on the currently active variant member", py::arg("gamma"));
+    m.def(
+        "calculate_gamma_value", [](const plssvm::gamma_type &gamma, const plssvm::aos_matrix<plssvm::real_type> &data) {
+            return plssvm::calculate_gamma_value(gamma, data);
+        },
+        "get the real_type value of the provided gamma type",
+        py::arg("gamma"),
+        py::arg("matrix"));
 }
diff --git a/bindings/Python/kernel_function_types.cpp b/bindings/Python/kernel_function_types.cpp
index 32fee950b..ca8c45c51 100644
--- a/bindings/Python/kernel_function_types.cpp
+++ b/bindings/Python/kernel_function_types.cpp
@@ -8,13 +8,16 @@
 
 #include "plssvm/kernel_function_types.hpp"  // plssvm::kernel_function_type
 
-#include "pybind11/pybind11.h"  // py::module_, py::enum_
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::register_implicit_str_enum_conversion
+
+#include "pybind11/pybind11.h"  // py::module_, py::enum_, py::arg
 
 namespace py = pybind11;
 
 void init_kernel_function_types(py::module_ &m) {
     // bind enum class
-    py::enum_<plssvm::kernel_function_type>(m, "KernelFunctionType", "Enum class for all implemented kernel functions in PLSSVM.")
+    py::enum_<plssvm::kernel_function_type> py_enum(m, "KernelFunctionType", "Enum class for all implemented kernel functions in PLSSVM.");
+    py_enum
         .value("LINEAR", plssvm::kernel_function_type::linear, "linear kernel function: <u, v>")
         .value("POLYNOMIAL", plssvm::kernel_function_type::polynomial, "polynomial kernel function: (gamma * <u, v> + coef0)^degree")
         .value("RBF", plssvm::kernel_function_type::rbf, "radial basis function: exp(-gamma * ||u - v||^2)")
@@ -22,6 +25,9 @@ void init_kernel_function_types(py::module_ &m) {
         .value("LAPLACIAN", plssvm::kernel_function_type::laplacian, "laplacian kernel function: exp(-gamma * ||u - v||_1)")
         .value("CHI_SQUARED", plssvm::kernel_function_type::chi_squared, "chi-squared kernel function: exp(-gamma * sum_i (u[i] - v[i])^2 / (u[i] + v[i]))");
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::kernel_function_type>(py_enum);
+
     // bind free functions
-    m.def("kernel_function_type_to_math_string", &plssvm::kernel_function_type_to_math_string, "return the mathematical representation of a KernelFunctionType");
+    m.def("kernel_function_type_to_math_string", &plssvm::kernel_function_type_to_math_string, "return the mathematical representation of a KernelFunctionType", py::arg("kernel_function"));
 }
diff --git a/bindings/Python/kernel_functions.cpp b/bindings/Python/kernel_functions.cpp
index 6342513f8..84d28430a 100644
--- a/bindings/Python/kernel_functions.cpp
+++ b/bindings/Python/kernel_functions.cpp
@@ -9,71 +9,125 @@
 #include "plssvm/kernel_functions.hpp"  // plssvm::kernel_function
 
 #include "plssvm/constants.hpp"              // plssvm::real_type
+#include "plssvm/gamma.hpp"                  // plssvm::gamma_coefficient_type, plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"  // plssvm::kernel_function_type
 #include "plssvm/parameter.hpp"              // plssvm::parameter
 
-#include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::arg
+#include "pybind11/pybind11.h"  // py::module_, py::arg, py::kw_only
 #include "pybind11/stl.h"       // support for STL types: std::vector
 
-#include <variant>  // std::holds_alternative
+#include <variant>  // std::holds_alternative, std::get
 #include <vector>   // std::vector
 
 namespace py = pybind11;
 
 void init_kernel_functions(py::module_ &m) {
-    m.def("linear_kernel_function", &plssvm::kernel_function<plssvm::kernel_function_type::linear, plssvm::real_type>, "apply the linear kernel function to two vectors");
+    const plssvm::parameter default_params{};
+
+    m.def("linear_kernel_function", &plssvm::kernel_function<plssvm::kernel_function_type::linear, plssvm::real_type>, "apply the linear kernel function to two vectors", py::arg("x"), py::arg("y"));
     m.def(
-        "polynomial_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const int degree, const plssvm::real_type gamma, const plssvm::real_type coef0) {
-            return plssvm::kernel_function<plssvm::kernel_function_type::polynomial>(x, y, degree, gamma, coef0);
+        "polynomial_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0) {
+            if (std::holds_alternative<plssvm::real_type>(gamma)) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::polynomial>(x, y, degree, std::get<plssvm::real_type>(gamma), coef0);
+            } else if (std::get<plssvm::gamma_coefficient_type>(gamma) == plssvm::gamma_coefficient_type::automatic) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::polynomial>(x, y, degree, plssvm::real_type{ 1.0 } / static_cast<plssvm::real_type>(x.size()), coef0);
+            } else {
+                throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" };
+            }
         },
         "apply the polynomial kernel function to two vectors",
         py::arg("x"),
         py::arg("y"),
-        py::arg("degree"),
-        py::arg("gamma"),
-        py::arg("coef0"));
+        py::kw_only(),
+        py::arg("degree") = default_params.degree,
+        py::arg("gamma") = default_params.gamma,
+        py::arg("coef0") = default_params.coef0);
     m.def(
-        "rbf_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const plssvm::real_type gamma) {
-            return plssvm::kernel_function<plssvm::kernel_function_type::rbf>(x, y, gamma);
+        "rbf_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const plssvm::gamma_type gamma) {
+            if (std::holds_alternative<plssvm::real_type>(gamma)) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::rbf>(x, y, std::get<plssvm::real_type>(gamma));
+            } else if (std::get<plssvm::gamma_coefficient_type>(gamma) == plssvm::gamma_coefficient_type::automatic) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::rbf>(x, y, plssvm::real_type{ 1.0 } / static_cast<plssvm::real_type>(x.size()));
+            } else {
+                throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" };
+            }
         },
         "apply the radial basis function kernel function to two vectors",
         py::arg("x"),
         py::arg("y"),
-        py::arg("gamma"));
+        py::kw_only(),
+        py::arg("gamma") = default_params.gamma);
     m.def(
-        "sigmoid_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const plssvm::real_type gamma, const plssvm::real_type coef0) {
-            return plssvm::kernel_function<plssvm::kernel_function_type::sigmoid>(x, y, gamma, coef0);
+        "sigmoid_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const plssvm::gamma_type gamma, const plssvm::real_type coef0) {
+            if (std::holds_alternative<plssvm::real_type>(gamma)) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::sigmoid>(x, y, std::get<plssvm::real_type>(gamma), coef0);
+            } else if (std::get<plssvm::gamma_coefficient_type>(gamma) == plssvm::gamma_coefficient_type::automatic) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::sigmoid>(x, y, plssvm::real_type{ 1.0 } / static_cast<plssvm::real_type>(x.size()), coef0);
+            } else {
+                throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" };
+            }
         },
         "apply the sigmoid kernel function to two vectors",
         py::arg("x"),
         py::arg("y"),
-        py::arg("gamma"),
-        py::arg("coef0"));
+        py::kw_only(),
+        py::arg("gamma") = default_params.gamma,
+        py::arg("coef0") = default_params.coef0);
     m.def(
-        "laplacian_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const plssvm::real_type gamma) {
-            return plssvm::kernel_function<plssvm::kernel_function_type::laplacian>(x, y, gamma);
+        "laplacian_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const plssvm::gamma_type gamma) {
+            if (std::holds_alternative<plssvm::real_type>(gamma)) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::laplacian>(x, y, std::get<plssvm::real_type>(gamma));
+            } else if (std::get<plssvm::gamma_coefficient_type>(gamma) == plssvm::gamma_coefficient_type::automatic) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::laplacian>(x, y, plssvm::real_type{ 1.0 } / static_cast<plssvm::real_type>(x.size()));
+            } else {
+                throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" };
+            }
         },
         "apply the laplacian kernel function to two vectors",
         py::arg("x"),
         py::arg("y"),
-        py::arg("gamma"));
+        py::kw_only(),
+        py::arg("gamma") = default_params.gamma);
     m.def(
-        "chi_squared_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const plssvm::real_type gamma) {
-            return plssvm::kernel_function<plssvm::kernel_function_type::chi_squared>(x, y, gamma);
+        "chi_squared_kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const plssvm::gamma_type gamma) {
+            if (std::holds_alternative<plssvm::real_type>(gamma)) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::chi_squared>(x, y, std::get<plssvm::real_type>(gamma));
+            } else if (std::get<plssvm::gamma_coefficient_type>(gamma) == plssvm::gamma_coefficient_type::automatic) {
+                return plssvm::kernel_function<plssvm::kernel_function_type::chi_squared>(x, y, plssvm::real_type{ 1.0 } / static_cast<plssvm::real_type>(x.size()));
+            } else {
+                throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" };
+            }
         },
         "apply the chi-squared kernel function to two vectors",
         py::arg("x"),
         py::arg("y"),
-        py::arg("gamma"));
+        py::kw_only(),
+        py::arg("gamma") = default_params.gamma);
 
     m.def(
-        "kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, const plssvm::parameter &params) {
-            // check if params.gamma can be used -> must be a real_type!
-            if (params.kernel_type != plssvm::kernel_function_type::linear && !std::holds_alternative<plssvm::real_type>(params.gamma)) {
-                throw py::value_error{ fmt::format("In order to call 'kernel_function' the 'gamma' parameter must be a real_type, but is '{}'!", params.gamma) };
+        "kernel_function", [](const std::vector<plssvm::real_type> &x, const std::vector<plssvm::real_type> &y, plssvm::parameter params) {
+            if (params.kernel_type == plssvm::kernel_function_type::linear) {
+                // gamma doesn't matter in the linear kernel function -> simply call the kernel
+                return plssvm::kernel_function(x, y, params);
+            } else if (std::holds_alternative<plssvm::real_type>(params.gamma)) {
+                // the gamma value matters, but already is a real_type -> simply call the kernel
+                return plssvm::kernel_function(x, y, params);
+            } else if (std::get<plssvm::gamma_coefficient_type>(params.gamma) == plssvm::gamma_coefficient_type::automatic) {
+                // the gamma value matters and is automatic -> convert it to a real_type
+                params.gamma = plssvm::real_type{ 1.0 } / static_cast<plssvm::real_type>(x.size());
+                return plssvm::kernel_function(x, y, params);
+            } else {
+                // the gamma value matters and is scale -> not supported
+                throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" };
             }
-            return plssvm::kernel_function(x, y, params);
         },
-        "apply the kernel function defined in the parameter object to two vectors");
+        "apply the kernel function defined in the parameter object to two vectors",
+        py::arg("x"),
+        py::arg("y"),
+        py::kw_only(),
+        py::arg("params") = default_params);
+
+    m.def("get_gamma_type", []() {
+        return plssvm::gamma_type{ plssvm::gamma_coefficient_type::automatic };
+    });
 }
diff --git a/bindings/Python/main.cpp b/bindings/Python/main.cpp
index 616a0dec0..777317de7 100644
--- a/bindings/Python/main.cpp
+++ b/bindings/Python/main.cpp
@@ -7,11 +7,16 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "plssvm/environment.hpp"            // plssvm::environment::{initialize, finalize}
-#include "plssvm/exceptions/exceptions.hpp"  // plssvm::exception
-#include "plssvm/version/version.hpp"        // plssvm::version::version
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"  // plssvm::detail::log_untracked
+#include "plssvm/detail/utility.hpp"                    // PLSSVM_IS_DEFINED
+#include "plssvm/environment.hpp"                       // plssvm::environment::{initialize, finalize}
+#include "plssvm/exceptions/exceptions.hpp"             // plssvm::exception
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
+#include "plssvm/mpi/environment.hpp"                   // plssvm::mpi::is_executed_via_mpirun
+#include "plssvm/verbosity_levels.hpp"                  // plssvm::verbosity_level
+#include "plssvm/version/version.hpp"                   // plssvm::version::{version, major, minor, patch}
 
-#include "pybind11/pybind11.h"  // PYBIND11_MODULE, py::module_, py::exception, py::register_exception_translator
+#include "pybind11/pybind11.h"  // PYBIND11_MODULE, py::module_, py::exception, py::register_exception_translator, py::make_tuple
 #include "pybind11/pytypes.h"   // py::set_error
 
 #include <exception>  // std::exception_ptr, std::rethrow_exception
@@ -30,19 +35,18 @@ void init_gamma(py::module_ &);
 void init_classification_types(py::module_ &);
 void init_file_format_types(py::module_ &);
 void init_kernel_function_types(py::module_ &);
-void init_kernel_functions(py::module_ &);
 void init_parameter(py::module_ &);
+void init_kernel_functions(py::module_ &);
 void init_classification_model(py::module_ &);
 void init_regression_model(py::module_ &);
 void init_min_max_scaler(py::module_ &);
 void init_classification_data_set(py::module_ &);
 void init_regression_data_set(py::module_ &);
-void init_version(py::module_ &);
 void init_exceptions(py::module_ &, const py::exception<plssvm::exception> &);
 void init_regression_report(py::module_ &);
 void init_csvm(py::module_ &);
-void init_csvc(py::module_ &, py::module_ &);
-void init_csvr(py::module_ &, py::module_ &);
+void init_csvc(py::module_ &);
+void init_csvr(py::module_ &);
 void init_openmp_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_hpx_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_stdpar_csvm(py::module_ &, const py::exception<plssvm::exception> &);
@@ -57,13 +61,37 @@ void init_sklearn_svr(py::module_ &);
 PYBIND11_MODULE(plssvm, m) {
     m.doc() = "PLSSVM - Parallel Least Squares Support Vector Machine";
     m.attr("__version__") = plssvm::version::version;
-
-    // create a pure-virtual module
-    py::module_ pure_virtual = m.def_submodule("__pure_virtual");
+    m.attr("__version_info__") = py::make_tuple(plssvm::version::major, plssvm::version::minor, plssvm::version::patch);
+    m.attr("__has_mpi_support__") = PLSSVM_IS_DEFINED(PLSSVM_HAS_MPI_ENABLED);
 
     // automatically initialize the environments
     plssvm::environment::initialize();
 
+    // issue a warning if PLSSVM was build without MPI support, but the Python code was run via mpirun
+#if !defined(PLSSVM_HAS_MPI_ENABLED)
+    if (plssvm::mpi::is_executed_via_mpirun()) {
+        plssvm::detail::log_untracked(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
+                                      plssvm::mpi::communicator{},
+                                      "WARNING: PLSSVM was built without MPI support, but is currently executed via mpirun! "
+                                      "As a result, each MPI process will run the same code.\n");
+    }
+#endif
+
+    // issue a warning if PLSSVM was build with MPI support and mpi4py wasn't found, but the Python code was still run via mpirun
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    if (plssvm::mpi::is_executed_via_mpirun()) {
+        try {
+            [[maybe_unused]] const py::module_ module = py::module_::import("mpi4py.MPI");
+            // it worked
+        } catch (const py::error_already_set &) {
+            // error loading mpi4py -> issue the warning
+            plssvm::detail::log_untracked(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
+                                          plssvm::mpi::communicator{},
+                                          "WARNING: PLSSVM was built without MPI support and the current code is executed via mpirun, but mpi4py wasn't found!\n");
+        }
+    }
+#endif
+
     // automatically finalize the environments
     m.add_object("_cleanup", py::capsule([]() {
                      plssvm::environment::finalize();
@@ -98,19 +126,18 @@ PYBIND11_MODULE(plssvm, m) {
     init_classification_types(m);
     init_file_format_types(m);
     init_kernel_function_types(m);
-    init_kernel_functions(m);
     init_parameter(m);
+    init_kernel_functions(m);
     init_classification_model(m);
     init_regression_model(m);
     init_min_max_scaler(m);
     init_classification_data_set(m);
     init_regression_data_set(m);
-    init_version(m);
     init_exceptions(m, base_exception);
     init_regression_report(m);
-    init_csvm(pure_virtual);
-    init_csvc(m, pure_virtual);
-    init_csvr(m, pure_virtual);
+    init_csvm(m);
+    init_csvc(m);
+    init_csvr(m);
 
     // init bindings for the specific backends ONLY if the backend has been enabled
 #if defined(PLSSVM_HAS_OPENMP_BACKEND)
@@ -138,6 +165,7 @@ PYBIND11_MODULE(plssvm, m) {
     init_kokkos_csvm(m, base_exception);
 #endif
 
-    init_sklearn_svc(m);
-    init_sklearn_svr(m);
+    py::module_ sklearn_like_svm_model = m.def_submodule("svm", "a module containing the sklearn like SVC and SVR implementations");
+    init_sklearn_svc(sklearn_like_svm_model);
+    init_sklearn_svr(sklearn_like_svm_model);
 }
diff --git a/bindings/Python/model/classification_model.cpp b/bindings/Python/model/classification_model.cpp
index b0ebcc45b..c6bf180f9 100644
--- a/bindings/Python/model/classification_model.cpp
+++ b/bindings/Python/model/classification_model.cpp
@@ -11,19 +11,22 @@
 #include "plssvm/constants.hpp"           // plssvm::real_type
 #include "plssvm/detail/type_traits.hpp"  // plssvm::detail::remove_cvref_t
 #include "plssvm/matrix.hpp"              // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"    // plssvm::mpi::communicator
 
-#include "bindings/Python/model/variant_wrapper.hpp"  // plssvm::bindings::python::util::classification_model_wrapper
-#include "bindings/Python/utility.hpp"                // plssvm::bindings::python::util::{python_type_name_mapping, create_instance, vector_to_pyarray}
+#include "bindings/Python/model/variant_wrapper.hpp"        // plssvm::bindings::python::util::classification_model_wrapper
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::{python_type_name_mapping, create_instance, vector_to_pyarray}
 
 #include "fmt/format.h"         // fmt::format
 #include "fmt/ranges.h"         // fmt::join
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::pos_only, py::array, py::list
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::kw_only, py::array, py::list
 #include "pybind11/pytypes.h"   // py::type
 #include "pybind11/stl.h"       // support for STL types: std::vector
 
 #include <memory>    // std::make_unique
 #include <optional>  // std::optional, std::make_optional, std::nullopt
 #include <string>    // std::string
+#include <utility>   // std::move
 #include <variant>   // std::visit
 
 namespace py = pybind11;
@@ -32,18 +35,19 @@ void init_classification_model(py::module_ &m) {
     using plssvm::bindings::python::util::classification_model_wrapper;
 
     py::class_<classification_model_wrapper>(m, "ClassificationModel", "Implements a class encapsulating the result of a call to the C-SVC fit function. A model is used to predict the labels of a new data set.")
-        .def(py::init([](const std::string &filename, const std::optional<py::type> type) {
+        .def(py::init([](const std::string &filename, const std::optional<py::type> type, plssvm::mpi::communicator comm) {
                  if (type.has_value()) {
-                     return std::make_unique<classification_model_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_model, typename classification_model_wrapper::possible_model_types>(type.value(), filename));
+                     return std::make_unique<classification_model_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::classification_model, typename classification_model_wrapper::possible_model_types>(type.value(), std::move(comm), filename));
                  } else {
-                     return std::make_unique<classification_model_wrapper>(plssvm::classification_model<std::string>{ filename });
+                     return std::make_unique<classification_model_wrapper>(plssvm::classification_model<std::string>{ std::move(comm), filename });
                  }
              }),
              "load a previously learned classification model from a file",
              py::arg("filename"),
-             py::pos_only(),
-             py::arg("type") = std::nullopt)
-        .def("save", [](const classification_model_wrapper &self, const std::string &filename) { return std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file")
+             py::kw_only(),
+             py::arg("type") = std::nullopt,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def("save", [](const classification_model_wrapper &self, const std::string &filename) { return std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file", py::arg("filename"))
         .def("num_support_vectors", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_support_vectors(); }, self.model); }, "the number of support vectors (note: all training points become support vectors for LS-SVMs)")
         .def("num_features", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_features(); }, self.model); }, "the number of features of the support vectors")
         .def("get_params", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.get_params(); }, self.model); }, "the C-SVC hyper-parameters used to learn this model")
@@ -71,6 +75,7 @@ void init_classification_model(py::module_ &m) {
         .def("classes", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return plssvm::bindings::python::util::vector_to_pyarray(model.classes()); }, self.model); }, "the classes")
         .def("get_classification_type", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.get_classification_type(); }, self.model); }, "the classification type used to create this model")
         // clang-format off
+        .def("communicator", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.communicator(); }, self.model); }, "the associated MPI communicator")
         .def("__repr__", [](const classification_model_wrapper &self) {
             return std::visit([](auto &&model) {
                 using label_type = typename plssvm::detail::remove_cvref_t<decltype(model)>::label_type;
diff --git a/bindings/Python/model/regression_model.cpp b/bindings/Python/model/regression_model.cpp
index 52989cdfd..5c22f6147 100644
--- a/bindings/Python/model/regression_model.cpp
+++ b/bindings/Python/model/regression_model.cpp
@@ -11,19 +11,22 @@
 #include "plssvm/constants.hpp"           // plssvm::real_type
 #include "plssvm/detail/type_traits.hpp"  // plssvm::detail::remove_cvref_t
 #include "plssvm/matrix.hpp"              // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"    // plssvm::mpi::communicator
 
-#include "bindings/Python/model/variant_wrapper.hpp"  // plssvm::bindings::python::util::regression_model_wrapper
-#include "bindings/Python/utility.hpp"                // plssvm::bindings::python::util::{python_type_name_mapping, create_instance, vector_to_pyarray}
+#include "bindings/Python/model/variant_wrapper.hpp"        // plssvm::bindings::python::util::regression_model_wrapper
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::{python_type_name_mapping, create_instance, vector_to_pyarray}
 
 #include "fmt/format.h"         // fmt::format
 #include "fmt/ranges.h"         // fmt::join
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::pos_only, py::array, py::list
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::kw_only, py::array, py::list
 #include "pybind11/pytypes.h"   // py::type
 #include "pybind11/stl.h"       // support for STL types: std::vector
 
 #include <memory>    // std::make_unique
 #include <optional>  // std::optional, std::make_optional, std::nullopt
 #include <string>    // std::string
+#include <utility>   // std::move
 #include <variant>   // std::visit
 
 namespace py = pybind11;
@@ -32,18 +35,19 @@ void init_regression_model(py::module_ &m) {
     using plssvm::bindings::python::util::regression_model_wrapper;
 
     py::class_<regression_model_wrapper>(m, "RegressionModel", "Implements a class encapsulating the result of a call to the C-SVR fit function. A model is used to predict the labels of a new data set.")
-        .def(py::init([](const std::string &filename, const std::optional<py::type> type) {
+        .def(py::init([](const std::string &filename, const std::optional<py::type> type, plssvm::mpi::communicator comm) {
                  if (type.has_value()) {
-                     return std::make_unique<regression_model_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_model, typename regression_model_wrapper::possible_model_types>(type.value(), filename));
+                     return std::make_unique<regression_model_wrapper>(plssvm::bindings::python::util::create_instance<plssvm::regression_model, typename regression_model_wrapper::possible_model_types>(type.value(), std::move(comm), filename));
                  } else {
-                     return std::make_unique<regression_model_wrapper>(plssvm::regression_model<double>{ filename });
+                     return std::make_unique<regression_model_wrapper>(plssvm::regression_model<double>{ std::move(comm), filename });
                  }
              }),
              "load a previously learned regression model from a file",
              py::arg("filename"),
-             py::pos_only(),
-             py::arg("type") = std::nullopt)
-        .def("save", [](const regression_model_wrapper &self, const std::string &filename) { return std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file")
+             py::kw_only(),
+             py::arg("type") = std::nullopt,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def("save", [](const regression_model_wrapper &self, const std::string &filename) { return std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file", py::arg("filename"))
         .def("num_support_vectors", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_support_vectors(); }, self.model); }, "the number of support vectors (note: all training points become support vectors for LS-SVMs)")
         .def("num_features", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_features(); }, self.model); }, "the number of features of the support vectors")
         .def("get_params", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.get_params(); }, self.model); }, "the C-SVR hyper-parameters used to learn this model")
@@ -68,6 +72,7 @@ void init_regression_model(py::module_ &m) {
         // clang-format on
         .def("rho", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return plssvm::bindings::python::util::vector_to_pyarray(model.rho()); }, self.model); }, "the bias value after learning")
         // clang-format off
+        .def("communicator", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.communicator(); }, self.model); }, "the associated MPI communicator")
         .def("__repr__", [](const regression_model_wrapper &self) {
             return std::visit([](auto &&model) {
                 using label_type = typename plssvm::detail::remove_cvref_t<decltype(model)>::label_type;
diff --git a/bindings/Python/model/variant_wrapper.hpp b/bindings/Python/model/variant_wrapper.hpp
index ab5a063a5..908a058af 100644
--- a/bindings/Python/model/variant_wrapper.hpp
+++ b/bindings/Python/model/variant_wrapper.hpp
@@ -66,14 +66,11 @@ struct classification_model_wrapper {
  */
 struct regression_model_wrapper {
     /// A std::variant containing all possible regression model types.
-    using possible_model_types = std::variant<plssvm::regression_model<std::int16_t>,   // np.int16
-                                              plssvm::regression_model<std::uint16_t>,  // np.uint16
-                                              plssvm::regression_model<std::int32_t>,   // np.int32
-                                              plssvm::regression_model<std::uint32_t>,  // np.uint32
-                                              plssvm::regression_model<std::int64_t>,   // np.int64
-                                              plssvm::regression_model<std::uint64_t>,  // np.uint64
-                                              plssvm::regression_model<float>,          // np.float32
-                                              plssvm::regression_model<double>>;        // np.float64
+    using possible_model_types = std::variant<plssvm::regression_model<std::int16_t>,  // np.int16
+                                              plssvm::regression_model<std::int32_t>,  // np.int32
+                                              plssvm::regression_model<std::int64_t>,  // np.int64
+                                              plssvm::regression_model<float>,         // np.float32
+                                              plssvm::regression_model<double>>;       // np.float64
 
     /**
      * @brief Construct a new regression model by setting the active std::variant member.
diff --git a/bindings/Python/parameter.cpp b/bindings/Python/parameter.cpp
index 6868f18d6..bd419e06f 100644
--- a/bindings/Python/parameter.cpp
+++ b/bindings/Python/parameter.cpp
@@ -12,8 +12,6 @@
 #include "plssvm/gamma.hpp"                  // plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"  // plssvm::kernel_function_type
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter}
-
 #include "fmt/format.h"          // fmt::format
 #include "pybind11/operators.h"  // support for operators
 #include "pybind11/pybind11.h"   // py::module_, py::class_, py::init, py::return_value_policy, py::self
@@ -22,17 +20,19 @@
 namespace py = pybind11;
 
 void init_parameter(py::module_ &m) {
+    const plssvm::parameter default_params{};
+
     // bind parameter class
     py::class_<plssvm::parameter>(m, "Parameter", "A class for encapsulating all important C-SVM hyper-parameters.")
-        .def(py::init<>(), "default construct all hyper-parameters")
-        .def(py::init<plssvm::kernel_function_type, int, plssvm::real_type, plssvm::real_type, plssvm::real_type>(), "create a new Parameter object providing all hyper-parameters explicitly")
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
-                 // if one of the value named parameter is provided, set the respective value
-                 return plssvm::bindings::python::util::convert_kwargs_to_parameter(args);
+        .def(py::init([](const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost) {
+                 return plssvm::parameter{ kernel_type, degree, gamma, coef0, cost };
              }),
-             "create a new Parameter object with the optionally provided hyper-parameter values")
+             "create a new Parameter object with the optionally provided hyper-parameter values",
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost)
         .def_property(
             "kernel_type",
             [](const plssvm::parameter &self) { return self.kernel_type; },
diff --git a/bindings/Python/regression_report.cpp b/bindings/Python/regression_report.cpp
index fa4933a25..55dabfcb9 100644
--- a/bindings/Python/regression_report.cpp
+++ b/bindings/Python/regression_report.cpp
@@ -14,7 +14,7 @@
 #include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp"  // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::init, py::arg, py::pos_only, py::value_error
+#include "pybind11/pybind11.h"  // py::module_, py::init, py::arg, py::kw_only, py::value_error
 #include "pybind11/pytypes.h"   // py::object
 #include "pybind11/stl.h"       // support for STL types
 
@@ -52,6 +52,5 @@ void init_regression_report(py::module_ &m) {
              } else {
                  return py::str(fmt::format("{}", report));
              }
-         },
-                           y_true.labels); }, "create a new regression report by calculating all metrics between the correct and predicted labels", py::arg("y_true"), py::arg("y_pred"), py::pos_only(), py::arg("force_finite") = true, py::arg("output_dict") = false);
+         }, y_true.labels); }, "create a new regression report by calculating all metrics between the correct and predicted labels", py::arg("y_true"), py::arg("y_pred"), py::kw_only(), py::arg("force_finite") = true, py::arg("output_dict") = false);
 }
diff --git a/bindings/Python/sklearn_svc.cpp b/bindings/Python/sklearn_like/svc.cpp
similarity index 69%
rename from bindings/Python/sklearn_svc.cpp
rename to bindings/Python/sklearn_like/svc.cpp
index 63c4238ea..0c1e53f2e 100644
--- a/bindings/Python/sklearn_svc.cpp
+++ b/bindings/Python/sklearn_like/svc.cpp
@@ -10,6 +10,7 @@
 #include "plssvm/constants.hpp"                         // plssvm::real_type
 #include "plssvm/csvm_factory.hpp"                      // plssvm::make_csvc
 #include "plssvm/data_set/classification_data_set.hpp"  // plssvm::classification_data_set
+#include "plssvm/detail/assert.hpp"                     // PLSSVM_ASSERT
 #include "plssvm/detail/type_traits.hpp"                // plssvm::detail::remove_cvref_t
 #include "plssvm/gamma.hpp"                             // plssvm::gamma_coefficient_type, plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"             // plssvm::kernel_function_type
@@ -24,13 +25,15 @@
 #include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp"  // a custom Pybind11 type caster for a plssvm::bindings::python::label_vector_wrapper
 #include "bindings/Python/type_caster/matrix_type_caster.hpp"           // a custom Pybind11 type caster for a plssvm::matrix
 #include "bindings/Python/type_caster/matrix_wrapper_type_caster.hpp"   // a custom Pybind11 type caster for a plssvm::bindings::python::util::matrix_wrapper
-#include "bindings/Python/utility.hpp"                                  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_gamma_kwarg_to_variant, vector_to_pyarray}
+#include "bindings/Python/utility.hpp"                                  // plssvm::bindings::python::util::{check_kwargs_for_correctness, vector_to_pyarray}
 
 #include "fmt/format.h"          // fmt::format
 #include "fmt/ranges.h"          // fmt::join
+#include "pybind11/cast.h"       // py::cast
 #include "pybind11/numpy.h"      // support for STL types
 #include "pybind11/operators.h"  // support for operators
 #include "pybind11/pybind11.h"   // py::module_, py::class_, py::init, py::arg, py::return_value_policy, py::self, py::dynamic_attr, py::value_error, py::attribute_error
+#include "pybind11/pytypes.h"    // py::dict, py::kwargs, py::str
 #include "pybind11/stl.h"        // support for STL types
 
 #include <algorithm>  // std::fill
@@ -50,6 +53,25 @@
 namespace py = pybind11;
 
 // TODO: implement missing functionality (as far es possible)
+/*
+ * Currently missing:
+ * - shrinking constructor parameter (makes no sense for LS-SVMs)
+ * - probability constructor parameter (needs Platt scaling -> complex)
+ * - cache_size constructor parameter (not applicable in PLSSVM)
+ * - class_weight constructor parameter
+ * - break_ties constructor parameter
+ * - random_state constructor parameter (needed for probability estimates)
+ * - dual_coef_ attribute
+ * - probA_ attribute (needed for probability estimates)
+ * - probB_ attribute (needed for probability estimates)
+ * - get_metadata_routing function (no idea how to implement this function)
+ * - predict_log_proba function (needed for probability estimates)
+ * - predict_proba function (needed for probability estimates)
+ * - set_fit_request function (no idea how to implement this function)
+ * - set_score_request function (no idea how to implement this function)
+ * - sample_weight parameter for the fit function
+ * - sample_weight parameter for the score function
+ */
 
 // dummy
 struct svc {
@@ -57,6 +79,25 @@ struct svc {
     using possible_data_set_types = typename plssvm::bindings::python::util::classification_data_set_wrapper::possible_data_set_types;
     using possible_model_types = typename plssvm::bindings::python::util::classification_model_wrapper::possible_model_types;
 
+    /**
+     * @brief Construct a default svc wrapper doing nothing.
+     */
+    svc() :
+        svm_{ plssvm::make_csvc(plssvm::gamma = plssvm::gamma_coefficient_type::scale) } { }
+
+    /**
+     * @brief Construct a new svc wrapper with the provided parameters.
+     * @param[in] params the SVM hyper-parameters
+     * @param[in] epsilon the epsilon value for the CG termination criterion
+     * @param[in] max_iter the maximum number of CG iterations
+     * @param[in] classification the classfication type (or decision function shape)
+     */
+    svc(const plssvm::parameter params, const plssvm::real_type epsilon, const std::optional<unsigned long long> max_iter, const plssvm::classification_type classification) :
+        svm_{ plssvm::make_csvc(params) },
+        epsilon_{ epsilon },
+        max_iter_{ max_iter },
+        classification_{ classification } { }
+
     /**
      * @brief Wrapper function to call the private (friendship) predict_values function.
      * @tparam Args the types of the parameter used for calling the predict_values function
@@ -64,7 +105,8 @@ struct svc {
      * @return the predicted values (`[[nodiscard]]`)
      */
     template <typename... Args>
-    auto call_predict_values(Args &&...args) const {
+    [[nodiscard]] auto call_predict_values(Args &&...args) const {
+        PLSSVM_ASSERT(svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
         return svm_->predict_values(std::forward<Args>(args)...);
     }
 
@@ -106,243 +148,130 @@ struct svc {
      * @return a Python dictionary containing the used parameter (`[[nodiscard]]`)
      */
     [[nodiscard]] py::dict get_params(const bool) const {
+        PLSSVM_ASSERT(svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
         const plssvm::parameter params = svm_->get_params();
 
         // fill a Python dictionary with the supported keys and values
         py::dict py_params;
         py_params["C"] = params.cost;
-        py_params["break_ties"] = false;
-        py_params["cache_size"] = 0;
-        py_params["class_weight"] = py::none();
+        // py_params["break_ties"] = false;
+        // py_params["cache_size"] = 0;
+        // py_params["class_weight"] = py::none{};
         py_params["coef0"] = params.coef0;
         py_params["decision_function_shape"] = classification_ == plssvm::classification_type::oaa ? "ovr" : "ovo";
         py_params["degree"] = params.degree;
         if (std::holds_alternative<plssvm::real_type>(params.gamma)) {
             py_params["gamma"] = std::get<plssvm::real_type>(params.gamma);
         } else {
-            switch (std::get<plssvm::gamma_coefficient_type>(params.gamma)) {
-                case plssvm::gamma_coefficient_type::automatic:
-                    py_params["gamma"] = "auto";
-                    break;
-                case plssvm::gamma_coefficient_type::scale:
-                    py_params["gamma"] = "scale";
-                    break;
-            }
+            // can't use this for both or the numeric value would also be interpreted as a string like '0.001'
+            py_params["gamma"] = fmt::format("{}", params.gamma);
         }
         py_params["kernel"] = fmt::format("{}", params.kernel_type);
         py_params["max_iter"] = max_iter_.has_value() ? static_cast<long long>(max_iter_.value()) : -1;
-        py_params["probability"] = false;
-        py_params["random_state"] = py::none();
-        py_params["shrinking"] = false;
-        py_params["tol"] = epsilon_.value_or(plssvm::real_type{ 1e-10 });
+        // py_params["probability"] = false;
+        // py_params["random_state"] = py::none{};
+        // py_params["shrinking"] = false;
+        py_params["tol"] = epsilon_;
         py_params["verbose"] = plssvm::verbosity != plssvm::verbosity_level::quiet;
 
         return py_params;
     }
 
-    py::dtype py_dtype_{};
-    std::optional<plssvm::real_type> epsilon_{};
-    std::optional<unsigned long long> max_iter_{};
-    plssvm::classification_type classification_{ plssvm::classification_type::oaa };
-
-    std::unique_ptr<plssvm::csvc> svm_ = plssvm::make_csvc(plssvm::gamma = plssvm::gamma_coefficient_type::scale);
-    std::unique_ptr<possible_data_set_types> data_{};
-    std::unique_ptr<possible_model_types> model_{};
-
-    std::optional<std::vector<std::string>> feature_names_{};
-};
-
-namespace {
+    /**
+     * @brief Calculate the support vector indices per class.
+     * @return the support vector indices (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<int> calculate_sv_indices_per_class() const {
+        PLSSVM_ASSERT(model_ != nullptr, "model_ may not be a nullptr! Maybe you forgot to initialize it?");
 
-void parse_provided_kwargs(svc &self, const py::kwargs &args) {
-    // check keyword arguments
-    plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "C", "kernel", "degree", "gamma", "coef0", "shrinking", "probability", "tol", "cache_size", "class_weight", "verbose", "max_iter", "decision_function_shape", "break_ties", "random_state" });
+        return std::visit([&](auto &&model) {
+            using label_type = typename plssvm::detail::remove_cvref_t<decltype(model)>::label_type;
 
-    if (args.contains("C")) {
-        self.svm_->set_params(plssvm::cost = args["C"].cast<plssvm::real_type>());
-    }
-    if (args.contains("kernel")) {
-        const auto kernel_str = args["kernel"].cast<std::string>();
-        plssvm::kernel_function_type kernel{};
-        if (kernel_str == "linear") {
-            kernel = plssvm::kernel_function_type::linear;
-        } else if (kernel_str == "poly" || kernel_str == "polynomial") {
-            kernel = plssvm::kernel_function_type::polynomial;
-        } else if (kernel_str == "rbf") {
-            kernel = plssvm::kernel_function_type::rbf;
-        } else if (kernel_str == "sigmoid") {
-            kernel = plssvm::kernel_function_type::sigmoid;
-        } else if (kernel_str == "laplacian") {
-            kernel = plssvm::kernel_function_type::laplacian;
-        } else if (kernel_str == "chi_squared" || kernel_str == "chi-squared") {
-            kernel = plssvm::kernel_function_type::chi_squared;
-        } else if (kernel_str == "precomputed") {
-            throw py::value_error{ R"(The "kernel = 'precomputed'" parameter for the 'SVC' is not implemented yet!)" };
-        } else {
-            throw py::value_error{ fmt::format("'{}' is not in list", kernel_str) };
-        }
-        self.svm_->set_params(plssvm::kernel_type = kernel);
-    }
-    if (args.contains("degree")) {
-        self.svm_->set_params(plssvm::degree = args["degree"].cast<int>());
-    }
-    if (args.contains("gamma")) {
-        const plssvm::gamma_type gamma = plssvm::bindings::python::util::convert_gamma_kwarg_to_variant(args);
-        if (std::holds_alternative<plssvm::real_type>(gamma)) {
-            self.svm_->set_params(plssvm::gamma = std::get<plssvm::real_type>(gamma));
-        } else {
-            self.svm_->set_params(plssvm::gamma = std::get<plssvm::gamma_coefficient_type>(gamma));
-        }
-    }
-    if (args.contains("coef0")) {
-        self.svm_->set_params(plssvm::coef0 = args["coef0"].cast<plssvm::real_type>());
-    }
-    if (args.contains("shrinking")) {
-        throw py::value_error{ "The 'shrinking' parameter for the 'SVC' is not implemented and makes no sense for a LS-SVM!" };
-    }
-    if (args.contains("probability")) {
-        throw py::value_error{ "The 'probability' parameter for the 'SVC' is not implemented yet!" };
-    }
-    if (args.contains("tol")) {
-        self.epsilon_ = args["tol"].cast<plssvm::real_type>();
-    }
-    if (args.contains("cache_size")) {
-        throw py::value_error{ "The 'cache_size' parameter for the 'SVC' is not implemented and makes no sense for our PLSSVM implementation!" };
-    }
-    if (args.contains("class_weight")) {
-        throw py::value_error{ "The 'class_weight' parameter for the 'SVC' is not implemented yet!" };
-    }
-    if (args.contains("verbose")) {
-        if (args["verbose"].cast<bool>()) {
-            if (plssvm::verbosity == plssvm::verbosity_level::quiet) {
-                // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output
-                plssvm::verbosity = plssvm::verbosity_level::full;
+            std::map<label_type, std::vector<int>> indices_per_class{};
+            // init index-map map
+            for (const label_type &label : model.classes()) {
+                indices_per_class.insert({ label, std::vector<int>{} });
             }
-            // otherwise: use currently active verbosity level
-        } else {
-            plssvm::verbosity = plssvm::verbosity_level::quiet;
-        }
-    }
-    if (args.contains("max_iter")) {
-        const auto max_iter = args["max_iter"].cast<long long>();
-        if (max_iter > 0) {
-            // use provided value
-            self.max_iter_ = static_cast<unsigned long long>(max_iter);
-        } else if (max_iter == -1) {
-            // default behavior in PLSSVM -> do nothing
-        } else {
-            // invalid max_iter provided
-            throw py::value_error{ fmt::format("max_iter must either be greater than zero or -1, got {}!", max_iter) };
-        }
-    }
-    if (args.contains("decision_function_shape")) {
-        const std::string &dfs = args["decision_function_shape"].cast<std::string>();
-        if (dfs == "ovo") {
-            self.classification_ = plssvm::classification_type::oao;
-        } else if (dfs == "ovr") {
-            self.classification_ = plssvm::classification_type::oaa;
-        } else {
-            throw py::value_error{ fmt::format("decision_function_shape must be either 'ovr' or 'ovo', got {}.", dfs) };
-        }
-    }
-    if (args.contains("break_ties")) {
-        throw py::value_error{ "The 'break_ties' parameter for the 'SVC' is not implemented yet!" };
-    }
-    if (args.contains("random_state")) {
-        throw py::value_error{ "The 'random_state' parameter for the 'SVC' is not implemented yet!" };
-    }
-}
-
-void fit(svc &self) {
-    // perform sanity checks
-    if (self.svm_->get_params().cost <= plssvm::real_type{ 0.0 }) {
-        throw py::value_error{ "C <= 0" };
-    }
-    if (self.svm_->get_params().degree < 0) {
-        throw py::value_error{ "degree of polynomial kernel < 0" };
-    }
-    if (self.epsilon_.has_value() && self.epsilon_.value() <= plssvm::real_type{ 0.0 }) {
-        throw py::value_error{ "eps <= 0" };
+            // sort the indices into the respective bucket based on their associated class
+            for (std::size_t idx = 0; idx < model.num_support_vectors(); ++idx) {
+                indices_per_class[model.labels()->get()[idx]].push_back(static_cast<int>(idx));
+            }
+            // convert map values to vector
+            std::vector<int> support{};
+            support.reserve(model.num_support_vectors());
+            for (const auto &[label, indices] : indices_per_class) {
+                support.insert(support.cend(), indices.cbegin(), indices.cend());
+            }
+            return support;
+        },
+                          *model_);
     }
 
-    // fit the model using potentially provided keyword arguments
-    std::visit([&](auto &&data) {
-        using possible_model_types = typename svc::possible_model_types;
-
-        if (self.epsilon_.has_value() && self.max_iter_.has_value()) {
-            self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(data,
-                                                                                plssvm::classification = self.classification_,
-                                                                                plssvm::epsilon = self.epsilon_.value(),
-                                                                                plssvm::max_iter = self.max_iter_.value()));
-        } else if (self.epsilon_.has_value()) {
-            self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(data,
-                                                                                plssvm::classification = self.classification_,
-                                                                                plssvm::epsilon = self.epsilon_.value()));
-        } else if (self.max_iter_.has_value()) {
-            self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(data,
-                                                                                plssvm::classification = self.classification_,
-                                                                                plssvm::max_iter = self.max_iter_.value()));
-        } else {
-            self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(data,
-                                                                                plssvm::classification = self.classification_));
-        }
-    },
-               *self.data_);
-}
-
-template <typename svc>
-[[nodiscard]] std::vector<int> calculate_sv_indices_per_class(const svc &self) {
-    return std::visit([&](auto &&model) {
-        using label_type = typename plssvm::detail::remove_cvref_t<decltype(model)>::label_type;
+    /// Pointer to the the stored PLSSVM C-SVC instance.
+    std::unique_ptr<plssvm::csvc> svm_{};
+    /// The CG termination criterion if provided.
+    plssvm::real_type epsilon_{};
+    /// The maximum number of CG iterations if provided.
+    std::optional<unsigned long long> max_iter_{};
+    /// The used classification type (or decision function shape).
+    plssvm::classification_type classification_{};
 
-        std::map<label_type, std::vector<int>> indices_per_class{};
-        // init index-map map
-        for (const label_type &label : model.classes()) {
-            indices_per_class.insert({ label, std::vector<int>{} });
-        }
-        // sort the indices into the respective bucket based on their associated class
-        for (std::size_t idx = 0; idx < model.num_support_vectors(); ++idx) {
-            indices_per_class[model.labels()->get()[idx]].push_back(static_cast<int>(idx));
-        }
-        // convert map values to vector
-        std::vector<int> support{};
-        support.reserve(model.num_support_vectors());
-        for (const auto &[label, indices] : indices_per_class) {
-            support.insert(support.cend(), indices.cbegin(), indices.cend());
-        }
-        return support;
-    },
-                      *self.model_);
-}
+    /// The data type of the labels.
+    py::dtype py_dtype_{};
+    /// Pointer to the classification data set wrapper (represents data sets with all possible label types).
+    std::unique_ptr<possible_data_set_types> data_{};
+    /// Pointer to the classification model wrapper (represents models with all possible label types).
+    std::unique_ptr<possible_model_types> model_{};
 
-}  // namespace
+    /// The name of the features. Can only be provided via a Pandas DataFrame.
+    std::optional<std::vector<std::string>> feature_names_{};
+};
 
 void init_sklearn_svc(py::module_ &m) {
     // documentation based on sklearn.svm.SVC documentation
     py::class_<svc> py_svc(m, "SVC", py::dynamic_attr(), "A C-SVC implementation adhering to sklearn.svm.SVC using PLSSVM as backend.");
-    py_svc.def(py::init([](const py::kwargs &args) {
-                   // to silence constructor messages
-                   if (args.contains("verbose")) {
-                       if (args["verbose"].cast<bool>()) {
-                           if (plssvm::verbosity == plssvm::verbosity_level::quiet) {
-                               // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output
-                               plssvm::verbosity = plssvm::verbosity_level::full;
-                           }
-                           // otherwise: use currently active verbosity level
-                       } else {
-                           plssvm::verbosity = plssvm::verbosity_level::quiet;
+    py_svc.def(py::init([](const plssvm::real_type C, const plssvm::kernel_function_type kernel, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type tol, const bool verbose, const long long max_iter, const plssvm::classification_type decision_function_shape) {
+                   // sanity check parameters
+                   if (max_iter < -1) {
+                       throw py::value_error{ fmt::format("max_iter must either be greater than zero or -1, got {}!", max_iter) };
+                   }
+
+                   // set verbosity
+                   if (verbose) {
+                       if (plssvm::verbosity == plssvm::verbosity_level::quiet) {
+                           // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output
+                           plssvm::verbosity = plssvm::verbosity_level::full;
                        }
+                       // otherwise: use currently active verbosity level
                    } else {
-                       // sklearn default is quiet
                        plssvm::verbosity = plssvm::verbosity_level::quiet;
                    }
 
-                   // create SVC class
-                   auto self = std::make_unique<svc>();
-                   parse_provided_kwargs(*self, args);
-                   return self;
+                   // create plssvm::parameter struct
+                   const plssvm::parameter params{ kernel, degree, gamma, coef0, C };
+                   // we use an unsigned type for max_iter -> convert it to an optional to support -1
+                   const std::optional<unsigned long long> used_max_iter = max_iter == -1 ? std::nullopt : std::make_optional(static_cast<unsigned long long>(max_iter));
+                   // create SVC wrapper
+                   return svc{ params, tol, used_max_iter, decision_function_shape };
                }),
-               "Construct a new SVC classifier.");
+               "Construct a new SVC classifier.",
+               py::kw_only(),
+               py::arg("C") = 1.0,
+               py::arg("kernel") = plssvm::kernel_function_type::rbf,
+               py::arg("degree") = 3,
+               py::arg("gamma") = plssvm::gamma_coefficient_type::scale,
+               py::arg("coef0") = 0.0,
+               // py::arg("shrinking") = true,
+               // py::arg("probability") = false,
+               py::arg("tol") = 1e-10,
+               // py::arg("cache_size") = 200,
+               // py::arg("class_weight") = py::none{},
+               py::arg("verbose") = false,
+               py::arg("max_iter") = -1,
+               py::arg("decision_function_shape") = plssvm::classification_type::oaa
+               // py::arg("break_ties") = false,
+               // py::arg("random_state") = py::none{}
+    );
 
     //*************************************************************************************************************************************//
     //                                                             ATTRIBUTES                                                              //
@@ -361,6 +290,7 @@ void init_sklearn_svc(py::module_ &m) {
             std::fill(ptr, ptr + size, plssvm::real_type{ 1.0 });
             return py_array; }, "Multipliers of parameter C for each class. ndarray of shape (n_classes,)")
         .def_property_readonly("classes_", [](const svc &self) -> py::array {
+            PLSSVM_ASSERT(self.data_ != nullptr, "data_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "'SVC' object has no attribute 'classes_'" };
             }
@@ -369,6 +299,7 @@ void init_sklearn_svc(py::module_ &m) {
                 return plssvm::bindings::python::util::vector_to_pyarray(data.classes().value());
             }, *self.data_); }, "The classes labels. ndarray of shape (n_classes,)")
         .def_property_readonly("coef_", [](const svc &self) -> py::array {
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "'SVC' object has no attribute 'coef_'" };
             }
@@ -409,12 +340,13 @@ void init_sklearn_svc(py::module_ &m) {
                 return plssvm::bindings::python::util::vector_to_pyarray(rho);
             }, *self.model_); }, "Constants in decision function. ovo: ndarray of shape (n_classes * (n_classes - 1) / 2,). ovr: ndarray of shape (n_classes,)")
         .def_property_readonly("n_features_in_", [](const svc &self) -> int {
+            PLSSVM_ASSERT(self.data_ != nullptr, "data_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "'SVC' object has no attribute 'n_features_in_'" };
             }
 
             return static_cast<int>(std::visit([](auto &&data) { return data.num_features(); }, *self.data_)); }, "Number of features seen during fit. int")
-        .def_property_readonly("feature_names_in_", [](const svc &self) {
+        .def_property_readonly("feature_names_in_", [](const svc &self) -> py::array {
             if (!self.feature_names_.has_value()) {
                 throw py::attribute_error{ "'SVC' object has no attribute 'feature_names_in_'" };
             }
@@ -431,14 +363,14 @@ void init_sklearn_svc(py::module_ &m) {
                 throw py::attribute_error{ "'SVC' object has no attribute 'support_'" };
             }
 
-            return plssvm::bindings::python::util::vector_to_pyarray(calculate_sv_indices_per_class(self)); }, "Indices of support vectors. ndarray of shape (n_SV)")
+           return plssvm::bindings::python::util::vector_to_pyarray(self.calculate_sv_indices_per_class()); }, "Indices of support vectors. ndarray of shape (n_SV)")
         .def_property_readonly("support_vectors_", [](const svc &self) -> py::array {
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "'SVC' object has no attribute 'support_vectors_'" };
             }
 
             // get the sorted indices
-            const std::vector<int> support = calculate_sv_indices_per_class(self);
+            const std::vector<int> support = self.calculate_sv_indices_per_class();
             // convert support vectors matrix to 2d vector
             std::vector<std::vector<plssvm::real_type>> sv = std::visit([](auto &&model) { return model.support_vectors().to_2D_vector(); }, *self.model_);
 
@@ -480,6 +412,7 @@ void init_sklearn_svc(py::module_ &m) {
         .def_property_readonly("probA_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'probA_' (not implemented)" }; }, "Parameter learned in Platt scaling when probability=True. ndarray of shape (n_classes * (n_classes - 1) / 2)")
         .def_property_readonly("probB_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'probB_' (not implemented)" }; }, "Parameter learned in Platt scaling when probability=True. ndarray of shape (n_classes * (n_classes - 1) / 2)")
         .def_property_readonly("shape_fit_", [](const svc &self) {
+            PLSSVM_ASSERT(self.data_ != nullptr, "data_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "'SVC' object has no attribute 'shape_fit_'" };
             }
@@ -495,7 +428,6 @@ void init_sklearn_svc(py::module_ &m) {
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." };
             }
-
             return std::visit([&](auto &&model) -> py::array {
                 switch (self.classification_) {
                     case plssvm::classification_type::oaa:
@@ -507,7 +439,8 @@ void init_sklearn_svc(py::module_ &m) {
                             plssvm::soa_matrix<plssvm::real_type> w{};  // empty -> no need to befriend the model class!
 
                             // predict values using OAA -> num_data_points x num_classes
-                            const plssvm::aos_matrix<plssvm::real_type> votes = self.call_predict_values(params, sv, alpha, rho, w, predict_points);
+                            // note: must not be const or the custom type_caster won't kick in
+                            plssvm::aos_matrix<plssvm::real_type> votes = self.call_predict_values(params, sv, alpha, rho, w, predict_points);
 
                             // special case for binary classification
                             if (model.num_classes() == 2) {
@@ -601,6 +534,7 @@ void init_sklearn_svc(py::module_ &m) {
                 return py::array{};
             }, *self.model_); }, "Evaluate the decision function for the samples in X.")
         .def("fit", [](svc &self, plssvm::bindings::python::util::soa_matrix_wrapper<plssvm::real_type> data, plssvm::bindings::python::util::label_vector_wrapper<typename svc::possible_vector_types> labels, const std::optional<std::vector<plssvm::real_type>> &sample_weight) -> svc & {
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             // sanity check parameter
             if (sample_weight.has_value()) {
                 throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" };
@@ -617,17 +551,33 @@ void init_sklearn_svc(py::module_ &m) {
                 // get the label type and possible data set types
                 using label_type = typename plssvm::detail::remove_cvref_t<decltype(labels_vector)>::value_type;
                 using possible_data_set_types = typename svc::possible_data_set_types;
+                using possible_model_types = typename svc::possible_model_types;
+
                 // create the data set to fit
-                self.data_ = std::make_unique<possible_data_set_types>(plssvm::classification_data_set<label_type>(std::move(data.matrix), std::move(labels_vector)));
+                plssvm::classification_data_set<label_type> train_data{ std::move(data.matrix), std::move(labels_vector) };
+
+                // fit the model
+                if (self.max_iter_.has_value()) {
+                    self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(train_data,
+                                                                                        plssvm::epsilon = self.epsilon_,
+                                                                                        plssvm::classification = self.classification_,
+                                                                                        plssvm::max_iter = self.max_iter_.value()));
+                } else {
+                    self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(train_data,
+                                                                                        plssvm::epsilon = self.epsilon_,
+                                                                                        plssvm::classification = self.classification_));
+                }
+
+                // store data set internally
+                self.data_ = std::make_unique<possible_data_set_types>(std::move(train_data));
             },
-                       labels.labels);
+                      labels.labels);
 
-            // fit the model
-            fit(self);
-            return self; }, "Fit the SVM model according to the given training data.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt, py::return_value_policy::reference)
+            return self; }, py::return_value_policy::reference, "Fit the SVM model according to the given training data.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt)
         .def("get_metadata_routing", [](const svc &) { throw py::attribute_error{ "'SVC' object has no function 'get_metadata_routing' (not implemented)" }; }, "Get metadata routing of this object.")
         .def("get_params", &svc::get_params, "Get parameters for this estimator.", py::arg("deep") = true)
         .def("predict", [](svc &self, plssvm::soa_matrix<plssvm::real_type> data) -> py::array {
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." };
             }
@@ -639,10 +589,11 @@ void init_sklearn_svc(py::module_ &m) {
                 const plssvm::classification_data_set<label_type> data_to_predict{ std::move(data) };
                 // predict the data
                 return plssvm::bindings::python::util::vector_to_pyarray(self.svm_->predict(model, data_to_predict));
-            }, *self.model_); }, "Perform classification on samples in X.")
-        .def("predict_log_proba", [](const svc &, py::array_t<plssvm::real_type>) { throw py::attribute_error{ "'SVC' object has no function 'predict_log_proba' (not implemented)" }; }, "Compute log probabilities of possible outcomes for samples in X.")
-        .def("predict_proba", [](const svc &, py::array_t<plssvm::real_type>) { throw py::attribute_error{ "'SVC' object has no function 'predict_proba' (not implemented)" }; }, "Compute probabilities of possible outcomes for samples in X.")
+            }, *self.model_); }, "Perform classification on samples in X.", py::arg("X"))
+        .def("predict_log_proba", [](const svc &, py::array_t<plssvm::real_type>) { throw py::attribute_error{ "'SVC' object has no function 'predict_log_proba' (not implemented)" }; }, "Compute log probabilities of possible outcomes for samples in X.", py::arg("X"))
+        .def("predict_proba", [](const svc &, py::array_t<plssvm::real_type>) { throw py::attribute_error{ "'SVC' object has no function 'predict_proba' (not implemented)" }; }, "Compute probabilities of possible outcomes for samples in X.", py::arg("X"))
         .def("score", [](svc &self, plssvm::soa_matrix<plssvm::real_type> data, plssvm::bindings::python::util::label_vector_wrapper<typename svc::possible_vector_types> labels, const std::optional<std::vector<plssvm::real_type>> &sample_weight) -> plssvm::real_type {
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             // sanity check parameter
             if (sample_weight.has_value()) {
                 throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" };
@@ -666,11 +617,78 @@ void init_sklearn_svc(py::module_ &m) {
             }, labels.labels); }, "Return the mean accuracy on the given test data and labels.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt)
         .def("set_fit_request", [](const svc &) { throw py::attribute_error{ "'SVC' object has no function 'set_fit_request' (not implemented)" }; }, "Request metadata passed to the fit method.")
         .def("set_params", [](svc &self, const py::kwargs &args) -> svc & {
-            parse_provided_kwargs(self, args);
-            return self; }, "Set the parameters of this estimator.", py::return_value_policy::reference)
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
+            // check keyword arguments
+            plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "C", "kernel", "degree", "gamma", "coef0", "shrinking", "probability", "tol", "cache_size", "class_weight", "verbose", "max_iter", "decision_function_shape", "break_ties", "random_state" });
+
+            if (args.contains("C")) {
+                self.svm_->set_params(plssvm::cost = args["C"].cast<plssvm::real_type>());
+            }
+            if (args.contains("kernel")) {
+                self.svm_->set_params(plssvm::kernel_type = args["kernel"].cast<plssvm::kernel_function_type>());
+            }
+            if (args.contains("degree")) {
+                self.svm_->set_params(plssvm::degree = args["degree"].cast<int>());
+            }
+            if (args.contains("gamma")) {
+                self.svm_->set_params(plssvm::gamma = args["gamma"].cast<plssvm::gamma_type>());
+            }
+            if (args.contains("coef0")) {
+                self.svm_->set_params(plssvm::coef0 = args["coef0"].cast<plssvm::real_type>());
+            }
+            if (args.contains("shrinking")) {
+                throw py::value_error{ "The 'shrinking' parameter for the 'SVC' is not implemented and makes no sense for a LS-SVM!" };
+            }
+            if (args.contains("probability")) {
+                throw py::value_error{ "The 'probability' parameter for the 'SVC' is not implemented yet!" };
+            }
+            if (args.contains("tol")) {
+                self.epsilon_ = args["tol"].cast<plssvm::real_type>();
+            }
+            if (args.contains("cache_size")) {
+                throw py::value_error{ "The 'cache_size' parameter for the 'SVC' is not implemented and makes no sense for our PLSSVM implementation!" };
+            }
+            if (args.contains("class_weight")) {
+                throw py::value_error{ "The 'class_weight' parameter for the 'SVC' is not implemented yet!" };
+            }
+            if (args.contains("verbose")) {
+                if (args["verbose"].cast<bool>()) {
+                    if (plssvm::verbosity == plssvm::verbosity_level::quiet) {
+                        // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output
+                        plssvm::verbosity = plssvm::verbosity_level::full;
+                    }
+                    // otherwise: use currently active verbosity level
+                } else {
+                    plssvm::verbosity = plssvm::verbosity_level::quiet;
+                }
+            }
+            if (args.contains("max_iter")) {
+                const auto max_iter = args["max_iter"].cast<long long>();
+                if (max_iter > 0) {
+                    // use provided value
+                    self.max_iter_ = static_cast<unsigned long long>(max_iter);
+                } else if (max_iter == -1) {
+                    // default behavior in PLSSVM
+                    self.max_iter_ = std::nullopt;
+                } else {
+                    // invalid max_iter provided
+                    throw py::value_error{ fmt::format("max_iter must either be greater than zero or -1, got {}!", max_iter) };
+                }
+            }
+            if (args.contains("decision_function_shape")) {
+                self.classification_ = args["decision_function_shape"].cast<plssvm::classification_type>();
+            }
+            if (args.contains("break_ties")) {
+                throw py::value_error{ "The 'break_ties' parameter for the 'SVC' is not implemented yet!" };
+            }
+            if (args.contains("random_state")) {
+                throw py::value_error{ "The 'random_state' parameter for the 'SVC' is not implemented yet!" };
+            }
+            return self; }, py::return_value_policy::reference, "Set the parameters of this estimator.")
         .def("set_score_request", [](const svc &) { throw py::attribute_error{ "'SVC' object has no function 'set_score_request' (not implemented)" }; }, "Request metadata passed to the score method.")
         .def("__sklearn_is_fitted__", [](const svc &self) -> bool { return self.model_ != nullptr; }, "Return True if the estimator is fitted, False otherwise.")
         .def("__sklearn_clone__", [](const svc &self) -> svc {
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             // create a new SVC instance
             svc new_svc{};
             // copy the parameters
@@ -705,5 +723,5 @@ void init_sklearn_svc(py::module_ &m) {
                 }
             }
 
-            return fmt::format("plssvm.SVC({})", fmt::join(non_default_values, ", ")); });
+            return fmt::format("plssvm.svm.SVC({})", fmt::join(non_default_values, ", ")); }, "Print the SVC showing all non-default parameters.");
 }
diff --git a/bindings/Python/sklearn_svr.cpp b/bindings/Python/sklearn_like/svr.cpp
similarity index 65%
rename from bindings/Python/sklearn_svr.cpp
rename to bindings/Python/sklearn_like/svr.cpp
index 9256b49a0..0ebf68217 100644
--- a/bindings/Python/sklearn_svr.cpp
+++ b/bindings/Python/sklearn_like/svr.cpp
@@ -23,12 +23,15 @@
 #include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp"  // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper
 #include "bindings/Python/type_caster/matrix_type_caster.hpp"           // a custom Pybind11 type caster for a plssvm::matrix
 #include "bindings/Python/type_caster/matrix_wrapper_type_caster.hpp"   // a custom Pybind11 type caster for a plssvm::bindings::python::util::matrix_wrapper
-#include "bindings/Python/utility.hpp"                                  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_gamma_kwarg_to_variant, vector_to_pyarray}
+#include "bindings/Python/utility.hpp"                                  // plssvm::bindings::python::util::{check_kwargs_for_correctness, vector_to_pyarray}
 
 #include "fmt/format.h"          // fmt::format
+#include "fmt/ranges.h"          // fmt::join
+#include "pybind11/cast.h"       // py::cast
 #include "pybind11/numpy.h"      // support for STL types
 #include "pybind11/operators.h"  // support for operators
 #include "pybind11/pybind11.h"   // py::module_, py::class_, py::init, py::arg, py::return_value_policy, py::self, py::dynamic_attr, py::value_error, py::attribute_error
+#include "pybind11/pytypes.h"    // py::dict, py::kwargs, py::str
 #include "pybind11/stl.h"        // support for STL types
 
 #include <cstdint>   // std::int32_t
@@ -44,6 +47,18 @@
 namespace py = pybind11;
 
 // TODO: implement missing functionality (as far es possible)
+/*
+ * Currently missing:
+ * - shrinking constructor parameter (makes no sense for LS-SVMs)
+ * - cache_size constructor parameter (not applicable in PLSSVM)
+ * - epsilon constructor parameter (not applicable in PLSSVM since we implement a C-SVR and not an epsilon-SVR)
+ * - dual_coef_ attribute
+ * - get_metadata_routing function (no idea how to implement this function)
+ * - set_fit_request function (no idea how to implement this function)
+ * - set_score_request function (no idea how to implement this function)
+ * - sample_weight parameter for the fit function
+ * - sample_weight parameter for the score function
+ */
 
 // dummy
 struct svr {
@@ -51,6 +66,23 @@ struct svr {
     using possible_data_set_types = typename plssvm::bindings::python::util::regression_data_set_wrapper::possible_data_set_types;
     using possible_model_types = typename plssvm::bindings::python::util::regression_model_wrapper::possible_model_types;
 
+    /**
+     * @brief Construct a default svr wrapper doing nothing.
+     */
+    svr() :
+        svm_{ plssvm::make_csvr(plssvm::gamma = plssvm::gamma_coefficient_type::scale) } { }
+
+    /**
+     * @brief Construct a new svr wrapper with the provided parameters.
+     * @param[in] params the SVM hyper-parameters
+     * @param[in] epsilon the epsilon value for the CG termination criterion
+     * @param[in] max_iter the maximum number of CG iterations
+     */
+    svr(const plssvm::parameter params, const plssvm::real_type epsilon, const std::optional<unsigned long long> max_iter) :
+        svm_{ plssvm::make_csvr(params) },
+        epsilon_{ epsilon },
+        max_iter_{ max_iter } { }
+
     /**
      * @brief Get the w values used for the coef_ attribute from the currently learned linear model.
      * @return the w values (`[[nodiscard]]`)
@@ -73,190 +105,90 @@ struct svr {
         // fill a Python dictionary with the supported keys and values
         py::dict py_params;
         py_params["C"] = params.cost;
-        py_params["cache_size"] = 0;
+        // py_params["epsilon"] = 0.1;
+        // py_params["cache_size"] = 0;
         py_params["coef0"] = params.coef0;
         py_params["degree"] = params.degree;
         if (std::holds_alternative<plssvm::real_type>(params.gamma)) {
             py_params["gamma"] = std::get<plssvm::real_type>(params.gamma);
         } else {
-            switch (std::get<plssvm::gamma_coefficient_type>(params.gamma)) {
-                case plssvm::gamma_coefficient_type::automatic:
-                    py_params["gamma"] = "auto";
-                    break;
-                case plssvm::gamma_coefficient_type::scale:
-                    py_params["gamma"] = "scale";
-                    break;
-            }
+            // can't use this for both or the numeric value would also be interpreted as a string like '0.001'
+            py_params["gamma"] = fmt::format("{}", params.gamma);
         }
         py_params["kernel"] = fmt::format("{}", params.kernel_type);
         py_params["max_iter"] = max_iter_.has_value() ? static_cast<long long>(max_iter_.value()) : -1;
-        py_params["shrinking"] = false;
-        py_params["tol"] = epsilon_.value_or(plssvm::real_type{ 1e-10 });
+        // py_params["shrinking"] = false;
+        py_params["tol"] = epsilon_;
         py_params["verbose"] = plssvm::verbosity != plssvm::verbosity_level::quiet;
 
         return py_params;
     }
 
-    py::dtype py_dtype_{};
-    std::optional<plssvm::real_type> epsilon_{};
+    /// Pointer to the the stored PLSSVM C-SVR instance.
+    std::unique_ptr<plssvm::csvr> svm_{};
+    /// The CG termination criterion if provided.
+    plssvm::real_type epsilon_{};
+    /// The maximum number of CG iterations if provided.
     std::optional<unsigned long long> max_iter_{};
 
-    std::unique_ptr<plssvm::csvr> svm_ = plssvm::make_csvr(plssvm::gamma = plssvm::gamma_coefficient_type::scale);
+    /// The data type of the labels.
+    py::dtype py_dtype_{};
+    /// Pointer to the regression data set wrapper (represents data sets with all possible label types).
     std::unique_ptr<possible_data_set_types> data_{};
+    /// Pointer to the regression model wrapper (represents models with all possible label types).
     std::unique_ptr<possible_model_types> model_{};
 
+    /// The name of the features. Can only be provided via a Pandas DataFrame.
     std::optional<std::vector<std::string>> feature_names_{};
 };
 
-namespace {
-
-void parse_provided_kwargs(svr &self, const py::kwargs &args) {
-    // check keyword arguments
-    plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "C", "kernel", "degree", "gamma", "coef0", "shrinking", "tol", "cache_size", "verbose", "max_iter", "epsilon" });
-
-    if (args.contains("C")) {
-        self.svm_->set_params(plssvm::cost = args["C"].cast<plssvm::real_type>());
-    }
-    if (args.contains("kernel")) {
-        const auto kernel_str = args["kernel"].cast<std::string>();
-        plssvm::kernel_function_type kernel{};
-        if (kernel_str == "linear") {
-            kernel = plssvm::kernel_function_type::linear;
-        } else if (kernel_str == "poly" || kernel_str == "polynomial") {
-            kernel = plssvm::kernel_function_type::polynomial;
-        } else if (kernel_str == "rbf") {
-            kernel = plssvm::kernel_function_type::rbf;
-        } else if (kernel_str == "sigmoid") {
-            kernel = plssvm::kernel_function_type::sigmoid;
-        } else if (kernel_str == "laplacian") {
-            kernel = plssvm::kernel_function_type::laplacian;
-        } else if (kernel_str == "chi_squared" || kernel_str == "chi-squared") {
-            kernel = plssvm::kernel_function_type::chi_squared;
-        } else if (kernel_str == "precomputed") {
-            throw py::value_error{ R"(The "kernel = 'precomputed'" parameter for the 'SVR' is not implemented yet!)" };
-        } else {
-            throw py::value_error{ fmt::format("'{}' is not in list", kernel_str) };
-        }
-        self.svm_->set_params(plssvm::kernel_type = kernel);
-    }
-    if (args.contains("degree")) {
-        self.svm_->set_params(plssvm::degree = args["degree"].cast<int>());
-    }
-    if (args.contains("gamma")) {
-        const plssvm::gamma_type gamma = plssvm::bindings::python::util::convert_gamma_kwarg_to_variant(args);
-        if (std::holds_alternative<plssvm::real_type>(gamma)) {
-            self.svm_->set_params(plssvm::gamma = std::get<plssvm::real_type>(gamma));
-        } else {
-            self.svm_->set_params(plssvm::gamma = std::get<plssvm::gamma_coefficient_type>(gamma));
-        }
-    }
-    if (args.contains("coef0")) {
-        self.svm_->set_params(plssvm::coef0 = args["coef0"].cast<plssvm::real_type>());
-    }
-    if (args.contains("shrinking")) {
-        throw py::value_error{ "The 'shrinking' parameter for the 'SVR' is not implemented yet!" };
-    }
-    if (args.contains("tol")) {
-        self.epsilon_ = args["tol"].cast<plssvm::real_type>();
-    }
-    if (args.contains("cache_size")) {
-        throw py::value_error{ "The 'cache_size' parameter for the 'SVR' is not implemented yet!" };
-    }
-    if (args.contains("verbose")) {
-        if (args["verbose"].cast<bool>()) {
-            if (plssvm::verbosity == plssvm::verbosity_level::quiet) {
-                // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output
-                plssvm::verbosity = plssvm::verbosity_level::full;
-            }
-            // otherwise: use currently active verbosity level
-        } else {
-            plssvm::verbosity = plssvm::verbosity_level::quiet;
-        }
-    }
-    if (args.contains("max_iter")) {
-        const auto max_iter = args["max_iter"].cast<long long>();
-        if (max_iter > 0) {
-            // use provided value
-            self.max_iter_ = static_cast<unsigned long long>(max_iter);
-        } else if (max_iter == -1) {
-            // default behavior in PLSSVM -> do nothing
-        } else {
-            // invalid max_iter provided
-            throw py::value_error{ fmt::format("max_iter must either be greater than zero or -1, got {}!", max_iter) };
-        }
-    }
-    if (args.contains("epsilon")) {
-        throw py::value_error{ "The 'epsilon' parameter for the 'SVR' is not implemented yet!" };
-    }
-}
-
-void fit(svr &self) {
-    // perform sanity checks
-    if (self.svm_->get_params().cost <= plssvm::real_type{ 0.0 }) {
-        throw py::value_error{ "C <= 0" };
-    }
-    if (self.svm_->get_params().degree < 0) {
-        throw py::value_error{ "degree of polynomial kernel < 0" };
-    }
-    if (self.epsilon_.has_value() && self.epsilon_.value() <= plssvm::real_type{ 0.0 }) {
-        throw py::value_error{ "eps <= 0" };
-    }
-
-    // fit the model using potentially provided keyword arguments
-    std::visit([&](auto &&data) {
-        using possible_model_types = typename svr::possible_model_types;
-
-        if (self.epsilon_.has_value() && self.max_iter_.has_value()) {
-            self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(data,
-                                                                                plssvm::epsilon = self.epsilon_.value(),
-                                                                                plssvm::max_iter = self.max_iter_.value()));
-        } else if (self.epsilon_.has_value()) {
-            self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(data,
-                                                                                plssvm::epsilon = self.epsilon_.value()));
-        } else if (self.max_iter_.has_value()) {
-            self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(data,
-                                                                                plssvm::max_iter = self.max_iter_.value()));
-        } else {
-            self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(data));
-        }
-    },
-               *self.data_);
-}
-
-}  // namespace
-
 void init_sklearn_svr(py::module_ &m) {
     // documentation based on sklearn.svm.SVR documentation
     py::class_<svr> py_svr(m, "SVR", py::dynamic_attr(), "A C-SVR implementation adhering to sklearn.svm.SVR using PLSSVM as backend.");
-    py_svr.def(py::init([](const py::kwargs &args) {
-                   // to silence constructor messages
-                   if (args.contains("verbose")) {
-                       if (args["verbose"].cast<bool>()) {
-                           if (plssvm::verbosity == plssvm::verbosity_level::quiet) {
-                               // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output
-                               plssvm::verbosity = plssvm::verbosity_level::full;
-                           }
-                           // otherwise: use currently active verbosity level
-                       } else {
-                           plssvm::verbosity = plssvm::verbosity_level::quiet;
+    py_svr.def(py::init([](const plssvm::kernel_function_type kernel, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type tol, const plssvm::real_type C, const bool verbose, const long long max_iter) {
+                   // sanity check parameters
+                   if (max_iter < -1) {
+                       throw py::value_error{ fmt::format("max_iter must either be greater than zero or -1, got {}!", max_iter) };
+                   }
+
+                   // set verbosity
+                   if (verbose) {
+                       if (plssvm::verbosity == plssvm::verbosity_level::quiet) {
+                           // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output
+                           plssvm::verbosity = plssvm::verbosity_level::full;
                        }
+                       // otherwise: use currently active verbosity level
                    } else {
-                       // sklearn default is quiet
                        plssvm::verbosity = plssvm::verbosity_level::quiet;
                    }
 
-                   // create SVR class
-                   auto self = std::make_unique<svr>();
-                   parse_provided_kwargs(*self, args);
-                   return self;
+                   // create plssvm::parameter struct
+                   const plssvm::parameter params{ kernel, degree, gamma, coef0, C };
+                   // we use an unsigned type for max_iter -> convert it to an optional to support -1
+                   const std::optional<unsigned long long> used_max_iter = max_iter == -1 ? std::nullopt : std::make_optional(static_cast<unsigned long long>(max_iter));
+                   // create SVC wrapper
+                   return svr{ params, tol, used_max_iter };
                }),
-               "Construct a new SVR classifier.");
+               "Construct a new SVC classifier.",
+               py::kw_only(),
+               py::arg("kernel") = plssvm::kernel_function_type::rbf,
+               py::arg("degree") = 3,
+               py::arg("gamma") = plssvm::gamma_coefficient_type::scale,
+               py::arg("coef0") = 0.0,
+               py::arg("tol") = 1e-10,
+               py::arg("C") = 1.0,
+               // py::arg("epsilon") = 0.1,
+               // py::arg("shrinking") = true,     // true
+               // py::arg("cache_size") = 200,  // 200
+               py::arg("verbose") = false,
+               py::arg("max_iter") = -1);
 
     //*************************************************************************************************************************************//
     //                                                             ATTRIBUTES                                                              //
     //*************************************************************************************************************************************//
     py_svr
         .def_property_readonly("coef_", [](const svr &self) -> py::array {
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "'SVr' object has no attribute 'coef_'" };
             }
@@ -284,7 +216,7 @@ void init_sklearn_svr(py::module_ &m) {
             }
 
             return static_cast<int>(std::visit([](auto &&data) { return data.num_features(); }, *self.data_)); }, "Number of features seen during fit. int")
-        .def_property_readonly("feature_names_in_", [](const svr &self) {
+        .def_property_readonly("feature_names_in_", [](const svr &self) -> py::array {
             if (!self.feature_names_.has_value()) {
                 throw py::attribute_error{ "'SVR' object has no attribute 'feature_names_in_'" };
             }
@@ -319,6 +251,7 @@ void init_sklearn_svr(py::module_ &m) {
 
             return std::visit([](auto &&model) { return plssvm::bindings::python::util::vector_to_pyarray(std::vector<std::int32_t>{ static_cast<std::int32_t>(model.num_support_vectors()) }); }, *self.model_); }, "Number of support vectors for each class. ndarray of shape (1,), dtype=int32")
         .def_property_readonly("shape_fit_", [](const svr &self) {
+            PLSSVM_ASSERT(self.data_ != nullptr, "data_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "'SVR' object has no attribute 'shape_fit_'" };
             }
@@ -331,6 +264,7 @@ void init_sklearn_svr(py::module_ &m) {
     //*************************************************************************************************************************************//
     py_svr
         .def("fit", [](svr &self, plssvm::bindings::python::util::soa_matrix_wrapper<plssvm::real_type> data, plssvm::bindings::python::util::label_vector_wrapper<typename svr::possible_vector_types> labels, const std::optional<std::vector<plssvm::real_type>> &sample_weight) -> svr & {
+           PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (sample_weight.has_value()) {
                 throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" };
             }
@@ -346,17 +280,30 @@ void init_sklearn_svr(py::module_ &m) {
                 // get the label type and possible data set types
                 using label_type = typename plssvm::detail::remove_cvref_t<decltype(labels_vector)>::value_type;
                 using possible_data_set_types = typename svr::possible_data_set_types;
+                using possible_model_types = typename svr::possible_model_types;
+
                 // create the data set to fit
-                self.data_ = std::make_unique<possible_data_set_types>(plssvm::regression_data_set<label_type>(std::move(data.matrix), std::move(labels_vector)));
+                plssvm::regression_data_set<label_type> train_data{ std::move(data.matrix), std::move(labels_vector) };
+
+                // fit the model using potentially provided keyword arguments
+                if (self.max_iter_.has_value()) {
+                    self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(train_data,
+                                                                                        plssvm::epsilon = self.epsilon_,
+                                                                                        plssvm::max_iter = self.max_iter_.value()));
+                } else {
+                    self.model_ = std::make_unique<possible_model_types>(self.svm_->fit(train_data, plssvm::epsilon = self.epsilon_));
+                }
+
+                // store data set internally
+                self.data_ = std::make_unique<possible_data_set_types>(std::move(train_data));
             },
-                       labels.labels);
+                      labels.labels);
 
-            // fit the model using potentially provided keyword arguments
-            fit(self);
-            return self; }, "Fit the SVM model according to the given training data.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt, py::return_value_policy::reference)
+            return self; }, py::return_value_policy::reference, "Fit the SVM model according to the given training data.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt)
         .def("get_metadata_routing", [](const svr &) { throw py::attribute_error{ "'SVR' object has no function 'get_metadata_routing' (not implemented)" }; }, "Get metadata routing of this object.")
         .def("get_params", &svr::get_params, "Get parameters for this estimator.", py::arg("deep") = true)
         .def("predict", [](svr &self, plssvm::soa_matrix<plssvm::real_type> data) -> py::array {
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (self.model_ == nullptr) {
                 throw py::attribute_error{ "This SVR instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." };
             }
@@ -368,8 +315,9 @@ void init_sklearn_svr(py::module_ &m) {
                 const plssvm::regression_data_set<label_type> data_to_predict{ std::move(data) };
                 // predict the data
                 return plssvm::bindings::python::util::vector_to_pyarray(self.svm_->predict(model, data_to_predict));
-            }, *self.model_); }, "Perform classification on samples in X.")
+            }, *self.model_); }, "Perform classification on samples in X.", py::arg("X"))
         .def("score", [](svr &self, plssvm::soa_matrix<plssvm::real_type> data, plssvm::bindings::python::util::label_vector_wrapper<typename svr::possible_vector_types> labels, const std::optional<std::vector<plssvm::real_type>> &sample_weight) -> plssvm::real_type {
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             if (sample_weight.has_value()) {
                 throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" };
             }
@@ -392,11 +340,65 @@ void init_sklearn_svr(py::module_ &m) {
             }, labels.labels); }, "Return the mean accuracy on the given test data and labels.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt)
         .def("set_fit_request", [](const svr &) { throw py::attribute_error{ "'SVR' object has no function 'set_fit_request' (not implemented)" }; }, "Request metadata passed to the fit method.")
         .def("set_params", [](svr &self, const py::kwargs &args) -> svr & {
-            parse_provided_kwargs(self, args);
-            return self; }, "Set the parameters of this estimator.", py::return_value_policy::reference)
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
+            // check keyword arguments
+            plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "C", "kernel", "degree", "gamma", "coef0", "shrinking", "tol", "cache_size", "verbose", "max_iter", "epsilon" });
+
+            if (args.contains("kernel")) {
+                self.svm_->set_params(plssvm::kernel_type = args["kernel"].cast<plssvm::kernel_function_type>());
+            }
+            if (args.contains("degree")) {
+                self.svm_->set_params(plssvm::degree = args["degree"].cast<int>());
+            }
+            if (args.contains("gamma")) {
+                self.svm_->set_params(plssvm::gamma = args["gamma"].cast<plssvm::gamma_type>());
+            }
+            if (args.contains("coef0")) {
+                self.svm_->set_params(plssvm::coef0 = args["coef0"].cast<plssvm::real_type>());
+            }
+            if (args.contains("tol")) {
+                self.epsilon_ = args["tol"].cast<plssvm::real_type>();
+            }
+            if (args.contains("C")) {
+                self.svm_->set_params(plssvm::cost = args["C"].cast<plssvm::real_type>());
+            }
+            if (args.contains("epsilon")) {
+                throw py::value_error{ "The 'epsilon' parameter for the 'SVR' is not implemented yet!" };
+            }
+            if (args.contains("shrinking")) {
+                throw py::value_error{ "The 'shrinking' parameter for the 'SVR' is not implemented yet!" };
+            }
+            if (args.contains("cache_size")) {
+                throw py::value_error{ "The 'cache_size' parameter for the 'SVR' is not implemented yet!" };
+            }
+            if (args.contains("verbose")) {
+                if (args["verbose"].cast<bool>()) {
+                    if (plssvm::verbosity == plssvm::verbosity_level::quiet) {
+                        // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output
+                        plssvm::verbosity = plssvm::verbosity_level::full;
+                    }
+                    // otherwise: use currently active verbosity level
+               } else {
+                    plssvm::verbosity = plssvm::verbosity_level::quiet;
+               }
+            }
+            if (args.contains("max_iter")) {
+                const auto max_iter = args["max_iter"].cast<long long>();
+                if (max_iter > 0) {
+                    // use provided value
+                    self.max_iter_ = static_cast<unsigned long long>(max_iter);
+                } else if (max_iter == -1) {
+                    // default behavior in PLSSVM -> do nothing
+                } else {
+                    // invalid max_iter provided
+                    throw py::value_error{ fmt::format("max_iter must either be greater than zero or -1, got {}!", max_iter) };
+                }
+            }
+            return self; }, py::return_value_policy::reference, "Set the parameters of this estimator.")
         .def("set_score_request", [](const svr &) { throw py::attribute_error{ "'SVR' object has no function 'set_score_request' (not implemented)" }; }, "Request metadata passed to the score method.")
         .def("__sklearn_is_fitted__", [](const svr &self) -> bool { return self.model_ != nullptr; }, "Return True if the estimator is fitted, False otherwise.")
         .def("__sklearn_clone__", [](const svr &self) -> svr {
+            PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?");
             // create a new SVR instance
             svr new_svr{};
             // copy the parameters
@@ -430,5 +432,5 @@ void init_sklearn_svr(py::module_ &m) {
                 }
             }
 
-            return fmt::format("plssvm.SVR({})", fmt::join(non_default_values, ", ")); }, "Print the SVR showing all non-default parameters.");
+            return fmt::format("plssvm.svm.SVR({})", fmt::join(non_default_values, ", ")); }, "Print the SVR showing all non-default parameters.");
 }
diff --git a/bindings/Python/solver_types.cpp b/bindings/Python/solver_types.cpp
index 1c568c238..f8309fb4b 100644
--- a/bindings/Python/solver_types.cpp
+++ b/bindings/Python/solver_types.cpp
@@ -8,14 +8,20 @@
 
 #include "plssvm/solver_types.hpp"  // plssvm::solver_type
 
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::register_implicit_str_enum_conversion
+
 #include "pybind11/pybind11.h"  // py::module_, py::enum_
 
 namespace py = pybind11;
 
 void init_solver_types(py::module_ &m) {
     // bind enum class
-    py::enum_<plssvm::solver_type>(m, "SolverType", "Enum class for all possible solver types implemented in PLSSVM.")
+    py::enum_<plssvm::solver_type> py_enum(m, "SolverType", "Enum class for all possible solver types implemented in PLSSVM.");
+    py_enum
         .value("AUTOMATIC", plssvm::solver_type::automatic, "the default solver type; depends on the available device and system memory")
         .value("CG_EXPLICIT", plssvm::solver_type::cg_explicit, "explicitly assemble the kernel matrix on the device")
         .value("CG_IMPLICIT", plssvm::solver_type::cg_implicit, "implicitly calculate the kernel matrix entries in each CG iteration");
+
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::solver_type>(py_enum);
 }
diff --git a/bindings/Python/svm/csvc.cpp b/bindings/Python/svm/csvc.cpp
index af8bc16e6..1e9bd6c58 100644
--- a/bindings/Python/svm/csvc.cpp
+++ b/bindings/Python/svm/csvc.cpp
@@ -8,73 +8,77 @@
 
 #include "plssvm/svm/csvc.hpp"  // plssvm::csvc
 
+#include "plssvm/backend_types.hpp"                     // plssvm::backend_type
 #include "plssvm/classification_types.hpp"              // plssvm::classification_type
 #include "plssvm/constants.hpp"                         // plssvm::real_type
 #include "plssvm/data_set/classification_data_set.hpp"  // plssvm::classification_data_set
 #include "plssvm/detail/type_traits.hpp"                // plssvm::detail::remove_cvref_t
+#include "plssvm/gamma.hpp"                             // plssvm::gamma_type
+#include "plssvm/kernel_function_types.hpp"             // plssvm::kernel_function_type
 #include "plssvm/model/classification_model.hpp"        // plssvm::classification_model
-#include "plssvm/parameter.hpp"                         // plssvm::parameter, named parameters
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
+#include "plssvm/parameter.hpp"                         // plssvm::parameter, named arguments
 #include "plssvm/solver_types.hpp"                      // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"                  // plssvm::target_platform
 
-#include "bindings/Python/data_set/variant_wrapper.hpp"  // plssvm::bindings::python::util::classification_data_set_wrapper
-#include "bindings/Python/model/variant_wrapper.hpp"     // plssvm::bindings::python::util::classification_model_wrapper
-#include "bindings/Python/svm/utility.hpp"               // plssvm::bindings::python::util::assemble_csvm
-#include "bindings/Python/utility.hpp"                   // plssvm::bindings::python::util::{check_kwargs_for_correctness, python_type_name_mapping, vector_to_pyarray}
+#include "bindings/Python/data_set/variant_wrapper.hpp"     // plssvm::bindings::python::util::classification_data_set_wrapper
+#include "bindings/Python/model/variant_wrapper.hpp"        // plssvm::bindings::python::util::classification_model_wrapper
+#include "bindings/Python/svm/utility.hpp"                  // plssvm::bindings::python::util::assemble_csvm
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::{python_type_name_mapping, vector_to_pyarray}
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::kwargs, py::value_error
-#include "pybind11/stl.h"       // support for STL types: std::vector
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::kw_only, py::kwargs, py::value_error
+#include "pybind11/stl.h"       // support for STL types: std::optional
 
 #include <exception>    // std::exception
+#include <optional>     // std::optional, std::nullopt
 #include <string_view>  // std::string_view
+#include <utility>      // std::move
 #include <variant>      // std::visit, std::get
 
 namespace py = pybind11;
 
-void init_csvc(py::module_ &m, py::module_ &pure_virtual) {
+void init_csvc(py::module_ &m) {
     using plssvm::bindings::python::util::classification_data_set_wrapper;
     using plssvm::bindings::python::util::classification_model_wrapper;
 
-    const py::class_<plssvm::csvc> py_csvc(pure_virtual, "__pure_virtual_base_CSVC");
+    // the default parameters used
+    const plssvm::parameter default_params{};
 
     // bind plssvm::make_csvm factory functions to "generic" Python C-SVC class
-    py::class_<plssvm::csvc>(m, "CSVC", py_csvc, py::module_local(), "Base class for all backend C-SVC implementations.")
+    py::class_<plssvm::csvc, plssvm::csvm>(m, "CSVC", "Base class for all backend C-SVC implementations.")
         // IMPLICIT BACKEND
-        .def(py::init([](const py::kwargs &args) {
-                 return plssvm::bindings::python::util::assemble_csvm<plssvm::csvc>(args);
+        .def(py::init([](const plssvm::backend_type backend, const plssvm::target_platform target, const plssvm::parameter &params, plssvm::mpi::communicator comm, const py::kwargs &optional_args) {
+                 return plssvm::bindings::python::util::assemble_csvm<plssvm::csvc>(backend, target, params, std::move(comm), optional_args);
              }),
-             "create an C-SVC with the provided keyword arguments")
-        .def(py::init([](const plssvm::parameter &params, const py::kwargs &args) {
-                 return plssvm::bindings::python::util::assemble_csvm<plssvm::csvc>(args, params);
+             "create an C-SVC with the provided SVM parameter encapsulated in a plssvm.Parameter",
+             py::arg("backend") = plssvm::backend_type::automatic,
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::backend_type backend, const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, plssvm::mpi::communicator comm, const py::kwargs &optional_args) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return plssvm::bindings::python::util::assemble_csvm<plssvm::csvc>(backend, target, params, std::move(comm), optional_args);
              }),
-             "create an C-SVC with the provided parameters and keyword arguments; the values in params will be overwritten by the keyword arguments")
+             "create an C-SVC with the provided SVM parameter as separate keyword arguments",
+             py::arg("backend") = plssvm::backend_type::automatic,
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("comm") = plssvm::mpi::communicator{})
         // clang-format off
-        .def("fit", [](const plssvm::csvc &self, const classification_data_set_wrapper &data_set, const py::kwargs &args) -> classification_model_wrapper {
+        .def("fit", [](const plssvm::csvc &self, const classification_data_set_wrapper &data_set, const plssvm::real_type epsilon, const std::optional<unsigned long long> max_iter, const plssvm::classification_type classification, const plssvm::solver_type solver) -> classification_model_wrapper {
                 return std::visit([&](auto &&data) {
-                    // check keyword arguments
-                    plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "epsilon", "max_iter", "classification", "solver" });
-
-                    auto epsilon{ plssvm::real_type{ 1e-10 } };
-                    if (args.contains("epsilon")) {
-                        epsilon = args["epsilon"].cast<plssvm::real_type>();
-                    }
-
-                    // can't do it with max_iter due to OAO splitting the data set
-
-                    plssvm::classification_type classification{ plssvm::classification_type::oaa };
-                    if (args.contains("classification")) {
-                        classification = args["classification"].cast<plssvm::classification_type>();
-                    }
-
-                    plssvm::solver_type solver{ plssvm::solver_type::automatic };
-                    if (args.contains("solver")) {
-                        solver = args["solver"].cast<plssvm::solver_type>();
-                    }
-
-                    if (args.contains("max_iter")) {
+                    if (max_iter.has_value()) {
                         return classification_model_wrapper{ self.fit(data,
                                                                       plssvm::epsilon = epsilon,
-                                                                      plssvm::max_iter = args["max_iter"].cast<unsigned long long>(),
+                                                                      plssvm::max_iter = max_iter.value(),
                                                                       plssvm::classification = classification,
                                                                       plssvm::solver = solver) };
                     } else {
@@ -83,7 +87,13 @@ void init_csvc(py::module_ &m, py::module_ &pure_virtual) {
                                                                       plssvm::classification = classification,
                                                                       plssvm::solver = solver) };
                     }
-                }, data_set.data_set); }, "fit a model using the current C-SVC on the provided data")
+                }, data_set.data_set); }, "fit a model using the current C-SVC on the provided data",
+                py::arg("data"),
+                py::kw_only(),
+                py::arg("epsilon") = plssvm::real_type{ 1e-10 },
+                py::arg("max_iter") = std::nullopt,
+                py::arg("classification") = plssvm::classification_type::oaa,
+                py::arg("solver") = plssvm::solver_type::automatic)
         .def("predict", [](const plssvm::csvc &self, const classification_model_wrapper &trained_model, const classification_data_set_wrapper &data_set) {
                 return std::visit([&](auto &&model) {
                     using label_type = typename plssvm::detail::remove_cvref_t<decltype(model)>::label_type;
@@ -96,11 +106,11 @@ void init_csvc(py::module_ &m, py::module_ &pure_virtual) {
                         }, data_set.data_set);
                         throw py::value_error{ fmt::format("Mismatching label types! Trained the model with {}, but tried to predict it with {}.", python_type_name_mapping<label_type>(), data_set_label_type) };
                     }
-                }, trained_model.model); }, "predict the labels for a data set using a previously learned model")
+                }, trained_model.model); }, "predict the labels for a data set using a previously learned model", py::arg("model"), py::arg("data"))
         .def("score", [](const plssvm::csvc &self, const classification_model_wrapper &trained_model) {
                 return std::visit([&](auto &&model) {
                     return self.score(model);
-                }, trained_model.model); }, "calculate the accuracy of the model")
+                }, trained_model.model); }, "calculate the accuracy of the model", py::arg("model"))
         .def("score", [](const plssvm::csvc &self, const classification_model_wrapper &trained_model, const classification_data_set_wrapper &data_set) {
                 return std::visit([&](auto &&model) {
                     using label_type = typename plssvm::detail::remove_cvref_t<decltype(model)>::label_type;
@@ -113,6 +123,6 @@ void init_csvc(py::module_ &m, py::module_ &pure_virtual) {
                         }, data_set.data_set);
                         throw py::value_error{ fmt::format("Mismatching label types! Trained the model with {}, but tried to score it with {}.", python_type_name_mapping<label_type>(), data_set_label_type) };
                     }
-                }, trained_model.model); }, "calculate the accuracy of the model");
+                }, trained_model.model); }, "calculate the accuracy of the model", py::arg("model"), py::arg("data"));
     // clang-format on
 }
diff --git a/bindings/Python/svm/csvm.cpp b/bindings/Python/svm/csvm.cpp
index 88494ffef..78a1d4fb9 100644
--- a/bindings/Python/svm/csvm.cpp
+++ b/bindings/Python/svm/csvm.cpp
@@ -8,23 +8,42 @@
 
 #include "plssvm/svm/csvm.hpp"  // plssvm::csvm
 
-#include "plssvm/parameter.hpp"  // plssvm::parameter, named parameters
+#include "plssvm/constants.hpp"  // plssvm::real_type
+#include "plssvm/gamma.hpp"
+#include "plssvm/kernel_function_types.hpp"  // plssvm::kernel_function_type
+#include "plssvm/parameter.hpp"              // plssvm::parameter, named arguments
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter}
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::check_kwargs_for_correctness
 
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::kwargs
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::arg, py::kwargs, py::module_local
+#include "pybind11/stl.h"       // support for STL types: std::variant
 
 namespace py = pybind11;
 
-void init_csvm(py::module_ &pure_virtual) {
-    py::class_<plssvm::csvm>(pure_virtual, "__pure_virtual_base_CSVM", "Base class for all other C-SVC or C-SVR implementations.")
-        .def("get_params", &plssvm::csvm::get_params, "get the hyper-parameters used for this C-SVM")
-        .def("set_params", [](plssvm::csvm &self, const plssvm::parameter &params) { self.set_params(params); }, "update the hyper-parameters used for this C-SVM using a plssvm.Parameter object")
+void init_csvm(py::module_ &m) {
+    py::class_<plssvm::csvm>(m, "CSVM", py::module_local(), "Base class for all other C-SVC or C-SVR implementations.")
+        .def("get_params", &plssvm::csvm::get_params, py::return_value_policy::copy, "get the hyper-parameters used for this C-SVM")
+        .def("set_params", [](plssvm::csvm &self, const plssvm::parameter &params) { self.set_params(params); }, "update the hyper-parameters used for this C-SVM using a plssvm.Parameter object", py::arg("params"))
         .def("set_params", [](plssvm::csvm &self, const py::kwargs &args) {
                 // check keyword arguments
                 plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
                 // convert kwargs to parameter and update csvm internal parameter
-                self.set_params(plssvm::bindings::python::util::convert_kwargs_to_parameter(args, self.get_params())); }, "update the hyper-parameters used for this C-SVM using keyword arguments")
+                if (args.contains("kernel_type")) {
+                    self.set_params(plssvm::kernel_type = args["kernel_type"].cast<plssvm::kernel_function_type>());
+                }
+                if (args.contains("degree")) {
+                    self.set_params(plssvm::degree = args["degree"].cast<int>());
+                }
+                if (args.contains("gamma")) {
+                    self.set_params(plssvm::gamma = args["gamma"].cast<plssvm::gamma_type>());
+                }
+                if (args.contains("coef0")) {
+                    self.set_params(plssvm::coef0 = args["coef0"].cast<plssvm::real_type>());
+                }
+                if (args.contains("cost")) {
+                    self.set_params(plssvm::cost = args["cost"].cast<plssvm::real_type>());
+                } }, "update the hyper-parameters used for this C-SVM using keyword arguments")
         .def("get_target_platform", &plssvm::csvm::get_target_platform, "get the actual target platform this C-SVM runs on")
-        .def("num_available_devices", &plssvm::csvm::num_available_devices, "get the number of available devices for the current C-SVM");
+        .def("num_available_devices", &plssvm::csvm::num_available_devices, "get the number of available devices for the current C-SVM")
+        .def("communicator", &plssvm::csvm::communicator, "the associated MPI communicator");
 }
diff --git a/bindings/Python/svm/csvr.cpp b/bindings/Python/svm/csvr.cpp
index f9e05378e..23dddb1c1 100644
--- a/bindings/Python/svm/csvr.cpp
+++ b/bindings/Python/svm/csvr.cpp
@@ -8,71 +8,88 @@
 
 #include "plssvm/svm/csvr.hpp"  // plssvm::csvr
 
+#include "plssvm/backend_types.hpp"                 // plssvm::backend_type
 #include "plssvm/constants.hpp"                     // plssvm::real_type
 #include "plssvm/data_set/regression_data_set.hpp"  // plssvm::regression_data_set
+#include "plssvm/detail/type_traits.hpp"            // plssvm::detail::remove_cvref_t
+#include "plssvm/gamma.hpp"                         // plssvm::gamma_type
+#include "plssvm/kernel_function_types.hpp"         // plssvm::kernel_function_type
 #include "plssvm/model/regression_model.hpp"        // plssvm::regression_model
-#include "plssvm/parameter.hpp"                     // plssvm::parameter, named parameters
+#include "plssvm/mpi/communicator.hpp"              // plssvm::mpi::communicator
+#include "plssvm/parameter.hpp"                     // plssvm::parameter, named arguments
 #include "plssvm/solver_types.hpp"                  // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"              // plssvm::target_platform
 
-#include "bindings/Python/data_set/variant_wrapper.hpp"  // plssvm::bindings::python::util::regression_data_set_wrapper
-#include "bindings/Python/model/variant_wrapper.hpp"     // plssvm::bindings::python::util::regression_model_wrapper
-#include "bindings/Python/svm/utility.hpp"               // plssvm::bindings::python::util::assemble_csvm
-#include "bindings/Python/utility.hpp"                   // plssvm::bindings::python::util::{check_kwargs_for_correctness, python_type_name_mapping, vector_to_pyarray}
+#include "bindings/Python/data_set/variant_wrapper.hpp"     // plssvm::bindings::python::util::regression_data_set_wrapper
+#include "bindings/Python/model/variant_wrapper.hpp"        // plssvm::bindings::python::util::regression_model_wrapper
+#include "bindings/Python/svm/utility.hpp"                  // plssvm::bindings::python::util::assemble_csvm
+#include "bindings/Python/type_caster/mpi_type_caster.hpp"  // a custom Pybind11 type caster for a plssvm::mpi::communicator
+#include "bindings/Python/utility.hpp"                      // plssvm::bindings::python::util::{python_type_name_mapping, vector_to_pyarray}
 
 #include "fmt/format.h"         // fmt::format
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::kwargs, py::value_error
-#include "pybind11/stl.h"       // support for STL types: std::vector
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init, py::arg, py::kw_only, py::kwargs, py::value_error
+#include "pybind11/stl.h"       // support for STL types: std::optional
 
 #include <exception>    // std::exception
+#include <optional>     // std::optional, std::nullopt
 #include <string_view>  // std::string_view
+#include <utility>      // std::move
 #include <variant>      // std::visit, std::get
 
 namespace py = pybind11;
 
-void init_csvr(py::module_ &m, py::module_ &pure_virtual) {
+void init_csvr(py::module_ &m) {
     using plssvm::bindings::python::util::regression_data_set_wrapper;
     using plssvm::bindings::python::util::regression_model_wrapper;
 
-    const py::class_<plssvm::csvr> py_csvr(pure_virtual, "__pure_virtual_base_CSVR");
+    // the default parameters used
+    const plssvm::parameter default_params{};
 
     // bind plssvm::make_csvm factory functions to "generic" Python C-SVR class
-    py::class_<plssvm::csvr>(m, "CSVR", py_csvr, py::module_local(), "Base class for all backend C-SVR implementations.")
+    py::class_<plssvm::csvr, plssvm::csvm>(m, "CSVR", "Base class for all backend C-SVR implementations.")
         // IMPLICIT BACKEND
-        .def(py::init([](const py::kwargs &args) {
-                 return plssvm::bindings::python::util::assemble_csvm<plssvm::csvr>(args);
+        .def(py::init([](const plssvm::backend_type backend, const plssvm::target_platform target, const plssvm::parameter &params, plssvm::mpi::communicator comm, const py::kwargs &optional_args) {
+                 return plssvm::bindings::python::util::assemble_csvm<plssvm::csvr>(backend, target, params, std::move(comm), optional_args);
              }),
-             "create an C-SVR with the provided keyword arguments")
-        .def(py::init([](const plssvm::parameter &params, const py::kwargs &args) {
-                 return plssvm::bindings::python::util::assemble_csvm<plssvm::csvr>(args, params);
+             "create an C-SVR with the provided SVM parameter encapsulated in a plssvm.Parameter",
+             py::arg("backend") = plssvm::backend_type::automatic,
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("params") = default_params,
+             py::arg("comm") = plssvm::mpi::communicator{})
+        .def(py::init([](const plssvm::backend_type backend, const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, plssvm::mpi::communicator comm, const py::kwargs &optional_args) {
+                 const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost };
+                 return plssvm::bindings::python::util::assemble_csvm<plssvm::csvr>(backend, target, params, std::move(comm), optional_args);
              }),
-             "create an C-SVR with the provided parameters and keyword arguments; the values in params will be overwritten by the keyword arguments")
+             "create an C-SVR with the provided SVM parameter as separate keyword arguments",
+             py::arg("backend") = plssvm::backend_type::automatic,
+             py::arg("target") = plssvm::target_platform::automatic,
+             py::kw_only(),
+             py::arg("kernel_type") = default_params.kernel_type,
+             py::arg("degree") = default_params.degree,
+             py::arg("gamma") = default_params.gamma,
+             py::arg("coef0") = default_params.coef0,
+             py::arg("cost") = default_params.cost,
+             py::arg("comm") = plssvm::mpi::communicator{})
         // clang-format off
-        .def("fit", [](const plssvm::csvr &self, const regression_data_set_wrapper &data_set, const py::kwargs &args) {
+        .def("fit", [](const plssvm::csvr &self, const regression_data_set_wrapper &data_set, const plssvm::real_type epsilon, const std::optional<unsigned long long> max_iter, const plssvm::solver_type solver) {
                 return std::visit([&](auto &&data) {
-                    // check keyword arguments
-                    plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "epsilon", "max_iter", "solver" });
-
-                    auto epsilon{ plssvm::real_type{ 1e-10 } };
-                    if (args.contains("epsilon")) {
-                        epsilon = args["epsilon"].cast<plssvm::real_type>();
-                    }
-
-                    plssvm::solver_type solver{ plssvm::solver_type::automatic };
-                    if (args.contains("solver")) {
-                        solver = args["solver"].cast<plssvm::solver_type>();
-                    }
-
-                    if (args.contains("max_iter")) {
+                    if (max_iter.has_value()) {
                         return regression_model_wrapper{ self.fit(data,
-                                                                  plssvm::epsilon = epsilon,
-                                                                  plssvm::max_iter = args["max_iter"].cast<unsigned long long>(),
-                                                                  plssvm::solver = solver) };
+                                                                      plssvm::epsilon = epsilon,
+                                                                      plssvm::max_iter = max_iter.value(),
+                                                                      plssvm::solver = solver) };
                     } else {
                         return regression_model_wrapper{ self.fit(data,
-                                                                  plssvm::epsilon = epsilon,
-                                                                  plssvm::solver = solver) };
+                                                                      plssvm::epsilon = epsilon,
+                                                                      plssvm::solver = solver) };
                     }
-                }, data_set.data_set); }, "fit a model using the current C-SVR on the provided data")
+                }, data_set.data_set); }, "fit a model using the current C-SVR on the provided data",
+                py::arg("data"),
+                py::kw_only(),
+                py::arg("epsilon") = plssvm::real_type{ 1e-10 },
+                py::arg("max_iter") = std::nullopt,
+                py::arg("solver") = plssvm::solver_type::automatic)
         .def("predict", [](const plssvm::csvr &self, const regression_model_wrapper &trained_model, const regression_data_set_wrapper &data_set) {
                 return std::visit([&](auto &&model) {
                     using label_type = typename plssvm::detail::remove_cvref_t<decltype(model)>::label_type;
@@ -85,11 +102,11 @@ void init_csvr(py::module_ &m, py::module_ &pure_virtual) {
                         }, data_set.data_set);
                         throw py::value_error{ fmt::format("Mismatching label types! Trained the model with {}, but tried to predict it with {}.", python_type_name_mapping<label_type>(), data_set_label_type) };
                     }
-                }, trained_model.model); }, "predict the labels for a data set using a previously learned model")
+                }, trained_model.model); }, "predict the labels for a data set using a previously learned model", py::arg("model"), py::arg("data"))
         .def("score", [](const plssvm::csvr &self, const regression_model_wrapper &trained_model) {
                 return std::visit([&](auto &&model) {
                     return self.score(model);
-                }, trained_model.model); }, "calculate the accuracy of the model")
+                }, trained_model.model); }, "calculate the accuracy of the model", py::arg("model"))
         .def("score", [](const plssvm::csvr &self, const regression_model_wrapper &trained_model, const regression_data_set_wrapper &data_set) {
                 return std::visit([&](auto &&model) {
                     using label_type = typename plssvm::detail::remove_cvref_t<decltype(model)>::label_type;
@@ -102,6 +119,6 @@ void init_csvr(py::module_ &m, py::module_ &pure_virtual) {
                         }, data_set.data_set);
                         throw py::value_error{ fmt::format("Mismatching label types! Trained the model with {}, but tried to score it with {}.", python_type_name_mapping<label_type>(), data_set_label_type) };
                     }
-                    }, trained_model.model); }, "calculate the accuracy of the model");
+                    }, trained_model.model); }, "calculate the accuracy of the model", py::arg("model"), py::arg("data"));
     // clang-format on
 }
diff --git a/bindings/Python/svm/utility.hpp b/bindings/Python/svm/utility.hpp
index 3cedd5cb6..38019bf8b 100644
--- a/bindings/Python/svm/utility.hpp
+++ b/bindings/Python/svm/utility.hpp
@@ -13,86 +13,63 @@
 #define PLSSVM_BINDINGS_PYTHON_SVM_UTILITY_HPP_
 #pragma once
 
-#include "plssvm/backend_types.hpp"                          // plssvm::backend_type, plssvm::determine_default_backend, plssvm::list_available_backends
+#include "plssvm/backend_types.hpp"                          // plssvm::backend_type
 #include "plssvm/backends/Kokkos/execution_space.hpp"        // plssvm::kokkos::execution_space
 #include "plssvm/backends/SYCL/implementation_types.hpp"     // plssvm::sycl::implementation_type
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
 #include "plssvm/csvm_factory.hpp"                           // plssvm::make_csvm
-#include "plssvm/parameter.hpp"                              // plssvm::parameter, named parameters
-#include "plssvm/target_platforms.hpp"                       // plssvm::target_platform, plssvm::determine_default_target_platform, plssvm::list_available_target_platforms
+#include "plssvm/mpi/communicator.hpp"                       // plssvm::mpi::communicator
+#include "plssvm/parameter.hpp"                              // plssvm::parameter, named arguments
+#include "plssvm/target_platforms.hpp"                       // plssvm::target_platform
 
-#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter}
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::check_kwargs_for_correctness
 
-#include "pybind11/pybind11.h"  // py::kwargs, py::instance, py::str, py::value_error
+#include "pybind11/pybind11.h"  // py::kwargs
 
 #include <memory>   // std::unique_ptr
-#include <sstream>  // std::istringstream
-#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace py = pybind11;
 
 namespace plssvm::bindings::python::util {
 
 /**
- * @brief Assemble a C-SVM (C-SVC or C-SVR based on the template parameter @p csvm_type) using the named Python arguments @p args and PLSSVM parameters @p input_params.
+ * @brief Assemble a C-SVM (C-SVC or C-SVR based on the template parameter @p csvm_type) using the provided parameters.
  * @tparam csvm_type the type of the C-SVM to create
- * @param[in] args the named Python arguments
- * @param[in] input_params the PLSSVM parameter
+ * @param[in] backend the C-SVM backend to instantiate
+ * @param[in] target the target platform to run on
+ * @param[in] params the SVM hyper-parameter used to train the C-SVM
+ * @param[in] comm the MPI communicator
+ * @param[in] optional_args optional arguments used by some C-SVMs
  * @return the created C-SVM (`[[nodiscard]]`)
  */
 template <typename csvm_type>
-[[nodiscard]] inline std::unique_ptr<csvm_type> assemble_csvm(const py::kwargs &args, plssvm::parameter input_params = {}) {
+[[nodiscard]] inline std::unique_ptr<csvm_type> assemble_csvm(const plssvm::backend_type backend, const plssvm::target_platform target, const plssvm::parameter &params, plssvm::mpi::communicator comm, const py::kwargs &optional_args) {
     // check keyword arguments
-    plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "backend", "target_platform", "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_implementation_type", "sycl_kernel_invocation_type", "kokkos_execution_space" });
-    // if one of the value keyword parameter is provided, set the respective value
-    const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args, input_params);
-    plssvm::backend_type backend = plssvm::determine_default_backend();
-    if (args.contains("backend")) {
-        if (py::isinstance<py::str>(args["backend"])) {
-            std::istringstream iss{ args["backend"].cast<std::string>() };
-            iss >> backend;
-            if (iss.fail()) {
-                throw py::value_error{ fmt::format("Available backends are \"{}\", got {}!", fmt::join(plssvm::list_available_backends(), ";"), args["backend"].cast<std::string>()) };
-            }
-        } else {
-            backend = args["backend"].cast<plssvm::backend_type>();
-        }
-    }
-    plssvm::target_platform target = plssvm::determine_default_target_platform();
-    if (args.contains("target_platform")) {
-        if (py::isinstance<py::str>(args["target_platform"])) {
-            std::istringstream iss{ args["target_platform"].cast<std::string>() };
-            iss >> target;
-            if (iss.fail()) {
-                throw py::value_error{ fmt::format("Available target platforms are \"{}\", got {}!", fmt::join(plssvm::list_available_target_platforms(), ";"), args["target_platform"].cast<std::string>()) };
-            }
-        } else {
-            target = args["target_platform"].cast<plssvm::target_platform>();
-        }
-    }
+    plssvm::bindings::python::util::check_kwargs_for_correctness(optional_args, { "foo", "sycl_implementation_type", "sycl_kernel_invocation_type", "kokkos_execution_space" });
 
     if (backend == plssvm::backend_type::sycl) {
         // parse SYCL specific keyword arguments
         plssvm::sycl::implementation_type impl_type = plssvm::sycl::implementation_type::automatic;
-        if (args.contains("sycl_implementation_type")) {
-            impl_type = args["sycl_implementation_type"].cast<plssvm::sycl::implementation_type>();
+        if (optional_args.contains("sycl_implementation_type")) {
+            impl_type = optional_args["sycl_implementation_type"].cast<plssvm::sycl::implementation_type>();
         }
         plssvm::sycl::kernel_invocation_type invocation_type = plssvm::sycl::kernel_invocation_type::automatic;
-        if (args.contains("sycl_kernel_invocation_type")) {
-            invocation_type = args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>();
+        if (optional_args.contains("sycl_kernel_invocation_type")) {
+            invocation_type = optional_args["sycl_kernel_invocation_type"].cast<plssvm::sycl::kernel_invocation_type>();
         }
 
-        return plssvm::make_csvm<csvm_type>(backend, target, params, plssvm::sycl_implementation_type = impl_type, plssvm::sycl_kernel_invocation_type = invocation_type);
+        return plssvm::make_csvm<csvm_type>(backend, std::move(comm), target, params, plssvm::sycl_implementation_type = impl_type, plssvm::sycl_kernel_invocation_type = invocation_type);
     } else if (backend == plssvm::backend_type::kokkos) {
         // parse Kokkos specific keyword arguments
         plssvm::kokkos::execution_space space = plssvm::kokkos::execution_space::automatic;
-        if (args.contains("kokkos_execution_space")) {
-            space = args["kokkos_execution_space"].cast<plssvm::kokkos::execution_space>();
+        if (optional_args.contains("kokkos_execution_space")) {
+            space = optional_args["kokkos_execution_space"].cast<plssvm::kokkos::execution_space>();
         }
 
-        return plssvm::make_csvm<csvm_type>(backend, target, params, plssvm::kokkos_execution_space = space);
+        return plssvm::make_csvm<csvm_type>(backend, std::move(comm), target, params, plssvm::kokkos_execution_space = space);
     } else {
-        return plssvm::make_csvm<csvm_type>(backend, target, params);
+        return plssvm::make_csvm<csvm_type>(backend, std::move(comm), target, params);
     }
 }
 
diff --git a/bindings/Python/svm_types.cpp b/bindings/Python/svm_types.cpp
index 8bf4eaee6..793354c12 100644
--- a/bindings/Python/svm_types.cpp
+++ b/bindings/Python/svm_types.cpp
@@ -8,18 +8,24 @@
 
 #include "plssvm/svm_types.hpp"  // plssvm::svm_type, plssvm::list_available_svm_types, plssvm::svm_type_from_model_file
 
-#include "pybind11/pybind11.h"  // py::module_
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::register_implicit_str_enum_conversion
+
+#include "pybind11/pybind11.h"  // py::module_, py::enum_
 
 namespace py = pybind11;
 
 void init_svm_types(py::module_ &m) {
     // bind enum class
-    py::enum_<plssvm::svm_type>(m, "SVMType", "Enum class for all implemented SVM types in PLSSVM.")
+    py::enum_<plssvm::svm_type> py_enum(m, "SVMType", "Enum class for all implemented SVM types in PLSSVM.");
+    py_enum
         .value("CSVC", plssvm::svm_type::csvc, "use a C-SVC for classification")
         .value("CSVR", plssvm::svm_type::csvr, "use a C-SVR for classification");
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::svm_type>(py_enum);
+
     // bind free functions
     m.def("list_available_svm_types", &plssvm::list_available_svm_types, "list the available SVM types");
-    m.def("svm_type_to_task_name", &plssvm::svm_type_to_task_name, "get the task name (e.g., \"classification\" or \"regression\") based on the provided SVMType");
-    m.def("svm_type_from_model_file", &plssvm::svm_type_from_model_file, "determine the SVMType based on the provided LIBSVM model file");
+    m.def("svm_type_to_task_name", &plssvm::svm_type_to_task_name, "get the task name (e.g., \"classification\" or \"regression\") based on the provided SVMType", py::arg("svm_type"));
+    m.def("svm_type_from_model_file", &plssvm::svm_type_from_model_file, "determine the SVMType based on the provided LIBSVM model file", py::arg("filename"));
 }
diff --git a/bindings/Python/target_platforms.cpp b/bindings/Python/target_platforms.cpp
index 36804bf84..e47be725b 100644
--- a/bindings/Python/target_platforms.cpp
+++ b/bindings/Python/target_platforms.cpp
@@ -8,6 +8,8 @@
 
 #include "plssvm/target_platforms.hpp"  // plssvm::target_platform, plssvm::list_available_target_platforms, plssvm::determine_default_target_platform
 
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::register_implicit_str_enum_conversion
+
 #include "pybind11/pybind11.h"  // py::module_, py::enum_
 #include "pybind11/stl.h"       // support for STL types: std::vector
 
@@ -15,13 +17,17 @@ namespace py = pybind11;
 
 void init_target_platforms(py::module_ &m) {
     // bind enum class
-    py::enum_<plssvm::target_platform>(m, "TargetPlatform", "Enum class for all possible targets that PLSSVM supports.")
+    py::enum_<plssvm::target_platform> py_enum(m, "TargetPlatform", "Enum class for all possible targets that PLSSVM supports.");
+    py_enum
         .value("AUTOMATIC", plssvm::target_platform::automatic, "the default target with respect to the used backend type; checks for available devices in the following order: NVIDIA GPUs -> AMD GPUs -> Intel GPUs -> CPUs")
         .value("CPU", plssvm::target_platform::cpu, "target CPUs only (Intel, AMD, IBM, ...)")
         .value("GPU_NVIDIA", plssvm::target_platform::gpu_nvidia, "target GPUs from NVIDIA")
         .value("GPU_AMD", plssvm::target_platform::gpu_amd, "target GPUs from AMD")
         .value("GPU_INTEL", plssvm::target_platform::gpu_intel, "target GPUs from Intel");
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::target_platform>(py_enum);
+
     // bind free functions
     m.def("list_available_target_platforms", &plssvm::list_available_target_platforms, "list the available target platforms (as defined during CMake configuration)");
     m.def("determine_default_target_platform", &plssvm::determine_default_target_platform, "determine the default target platform given the list of available target platforms", py::arg("platform_device_list") = plssvm::list_available_target_platforms());
diff --git a/bindings/Python/type_caster/label_vector_wrapper_caster.hpp b/bindings/Python/type_caster/label_vector_wrapper_caster.hpp
index 0cbe37fa5..becd60845 100644
--- a/bindings/Python/type_caster/label_vector_wrapper_caster.hpp
+++ b/bindings/Python/type_caster/label_vector_wrapper_caster.hpp
@@ -59,7 +59,7 @@ struct py_type_equal {
 /**
  * @brief Convert a Python Numpy array to a `std::vector<T>`.
  * @tparam T the type in the array
- * @param[in] vec the Python Numpy array to convert
+ * @param[in] arr the Python Numpy array to convert
  * @return the `std::vector<T>` (`[[nodiscard]]`)
  */
 template <typename T>
@@ -92,7 +92,7 @@ template <typename T>
 
 /**
  * @brief Convert a generic Python Numpy array to a `std::vector<T>`.
- * @param[in] vec the generic Python Numpy array to convert
+ * @param[in] arr the generic Python Numpy array to convert
  * @return a `std::variant` containing the converted `std::vector` (`[[nodiscard]]`)
  */
 template <typename possible_vector_types>
@@ -272,8 +272,7 @@ namespace pybind11::detail {
 
 /**
  * @brief A custom Pybind11 type caster to convert Python object from and to a plssvm::bindings::python::util::label_vector_wrapper.
- * @tparam T the value type of the PLSSVM matrix
- * @tparam layout the memory layout type of the PLSSVM matrix
+ * @tparam PossibleTypes the possible label vector types
  */
 template <typename PossibleTypes>
 struct type_caster<plssvm::bindings::python::util::label_vector_wrapper<PossibleTypes>> {
@@ -289,7 +288,7 @@ struct type_caster<plssvm::bindings::python::util::label_vector_wrapper<Possible
      * @param[in] labels the labels vector to convert to a Python Numpy ndarray
      * @return a Pybind11 handle to the Numpy ndarray
      */
-    static handle cast(const label_vector_wrapper_type &labels, return_value_policy, handle) {
+    static py::handle cast(const label_vector_wrapper_type &labels, py::return_value_policy, py::handle) {
         // convert a generic std::vector to a Numpy ndarray
         return std::visit([](auto &&vec) { return plssvm::bindings::python::util::vector_to_pyarray(vec).release(); }, labels.labels);
     }
@@ -299,7 +298,7 @@ struct type_caster<plssvm::bindings::python::util::label_vector_wrapper<Possible
      * @param[in] obj the object to convert
      * @return `true` if the conversion was successful, `false` otherwise
      */
-    bool load(handle obj, bool) {
+    bool load(py::handle obj, bool) {
         if (py::isinstance<py::list>(obj)) {
             // provided obj is a Python list
             auto [labels, dtype] = plssvm::bindings::python::util::generic_pylist_to_vector<PossibleTypes>(py::cast<py::list>(obj));
diff --git a/bindings/Python/type_caster/matrix_type_caster.hpp b/bindings/Python/type_caster/matrix_type_caster.hpp
index 0b664d6ec..0231beb76 100644
--- a/bindings/Python/type_caster/matrix_type_caster.hpp
+++ b/bindings/Python/type_caster/matrix_type_caster.hpp
@@ -57,7 +57,7 @@ struct type_caster<plssvm::matrix<T, layout>> {
      * @param[in] matr the PLSSVM matrix to convert to a Numpy ndarray
      * @return a Pybind11 handle to the Numpy ndarray
      */
-    static handle cast(const matrix_type &matr, return_value_policy, handle) {
+    static py::handle cast(const matrix_type &matr, py::return_value_policy, py::handle) {
         const std::size_t num_data_points = matr.num_rows();
         const std::size_t num_features = matr.num_cols();
 
@@ -102,13 +102,13 @@ struct type_caster<plssvm::matrix<T, layout>> {
         const std::size_t num_rows = arr.shape(0);
         const std::size_t num_cols = arr.shape(1);
 
-        // note: the conversions use OpenMP -> remove Python's Global Interpreter Lock
-        const py::gil_scoped_release release;
-
         // get the underlying raw memory
         py::buffer_info buffer = arr.request();
         const T *ptr = static_cast<T *>(buffer.ptr);
 
+        // note: the conversions use OpenMP -> remove Python's Global Interpreter Lock
+        const py::gil_scoped_release release;
+
         // check the memory layout of the Python Numpy array
         if constexpr (static_cast<bool>(Flags & py::array::c_style)) {
             // the provided Python Numpy array has C style layout
@@ -167,7 +167,7 @@ struct type_caster<plssvm::matrix<T, layout>> {
      * @throws py::value_error if @p obj is not a Numpy ndarray, Pandas DataFrame, SciPy sparse matrix, or Python 2D list
      * @throws py::value_error if the Numpy ndarray doesn't have a two-dimensional shape
      */
-    bool load(handle obj, bool) {
+    bool load(py::handle obj, bool) {
         // special case py::list
         if (py::isinstance<py::list>(obj)) {
             // provided obj is a Python list -> check if it is a correct py::list of py::list
diff --git a/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp b/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp
index 75445dce2..de1105077 100644
--- a/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp
+++ b/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp
@@ -79,7 +79,7 @@ struct type_caster<plssvm::bindings::python::util::matrix_wrapper<T, layout>> {
      * @param[in] matr the PLSSVM matrix to convert to a Numpy ndarray
      * @return a Pybind11 handle to the Numpy ndarray
      */
-    static handle cast(const matrix_type &matr, return_value_policy, handle) {
+    static py::handle cast(const matrix_type &matr, py::return_value_policy, py::handle) {
         return py::cast(matr.matrix);
     }
 
@@ -91,7 +91,7 @@ struct type_caster<plssvm::bindings::python::util::matrix_wrapper<T, layout>> {
      * @throws py::value_error all exceptions from the custom plssvm::matrix type caster
      * @throws py::value_error if not all column names are strings
      */
-    bool load(handle obj, bool) {
+    bool load(py::handle obj, bool) {
         // convert the object to a plssvm::matrix
         value.matrix = obj.cast<plssvm::matrix<T, layout>>();
 
diff --git a/bindings/Python/type_caster/mpi_type_caster.hpp b/bindings/Python/type_caster/mpi_type_caster.hpp
new file mode 100644
index 000000000..35ff98ec9
--- /dev/null
+++ b/bindings/Python/type_caster/mpi_type_caster.hpp
@@ -0,0 +1,109 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implements a custom type caster for a plssvm::mpi::communicator used with mpi4py.
+ */
+
+#ifndef PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_MPI_TYPE_CASTER_HPP_
+#define PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_MPI_TYPE_CASTER_HPP_
+#pragma once
+
+#include "plssvm/mpi/communicator.hpp"  // plssvm::mpi::communicator
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi/mpi.h"  // MPI_Comm, MPI_Comm_c2f, MPI_Comm_f2c, MPI_Fint
+#endif
+
+#include "pybind11/cast.h"      // pybind11::detail::type_caster
+#include "pybind11/pybind11.h"  // py::module, py::isinstance, py::none, py::return_value_policy, py::error_already_set
+#include "pybind11/pytypes.h"   // py::handle
+
+namespace py = pybind11;
+
+namespace pybind11::detail {
+
+/**
+ * @brief A custom Pybind11 type caster to convert Python object from and to a plssvm::mpi::communicator.
+ */
+template <>
+struct type_caster<plssvm::mpi::communicator> {
+  public:
+    /// Specify the Python type name to which a plssvm::mpi::communicator should be converted.
+    PYBIND11_TYPE_CASTER(plssvm::mpi::communicator, _("MPI_Comm"));
+
+    /**
+     * @brief Convert a plssvm::mpi::communicator to a mpi4py communicator.
+     * @param[in] comm the PLSSVM MPI communicator wrapper
+     * @return a Pybind11 handle to the mpi4py communicator
+     */
+    static py::handle cast([[maybe_unused]] const plssvm::mpi::communicator &comm, py::return_value_policy, py::handle) {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+        // we have MPI enabled
+        try {
+            // if we can find mpi4py, we can convert a MPI_Comm to its mpi4py representation
+            const py::module_ mpi4py = py::module_::import("mpi4py.MPI");
+            return mpi4py.attr("Comm").attr("f2py")(MPI_Comm_c2f(static_cast<MPI_Comm>(comm))).release();
+        } catch (const py::error_already_set &) {
+            // we couldn't find mpi4py, so simply return None since anything else doesn't make sense
+            return py::none{}.release();
+        }
+#else
+        // we haven't MPI enabled -> return None since anything else doesn't make sense
+        return py::none{}.release();
+#endif
+    }
+
+    /**
+     * @brief Try converting a Python object @p obj to a plssvm::mpi::communicator.
+     * @param[in] obj the object to convert
+     * @return `true` if the conversion was successful, `false` otherwise
+     * @throws py::value_error if PLSSVM was built without MPI support, but a communicator was explicitly provided in Python
+     */
+    bool load([[maybe_unused]] py::handle obj, bool) {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+        try {
+            // check if we can find mpi4py
+            const py::module_ mpi4py = py::module_::import("mpi4py.MPI");
+            // we found mpi4py -> check whether a communicator was provided
+            if (py::isinstance(obj, mpi4py.attr("Comm"))) {
+                // we got a mpi4py communicator -> cast it to an MPI_Comm handle and create a new plssvm::mpi::communicator
+                const MPI_Fint f_handle = obj.attr("py2f")().cast<MPI_Fint>();
+                value = plssvm::mpi::communicator{ MPI_Comm_f2c(f_handle) };
+                return true;
+            } else {
+                // something else was provided -> abort type casting
+                return false;
+            }
+        } catch (const py::error_already_set &) {
+            // we couldn't find mpi4py
+            if (obj.is_none()) {
+                // but "comm" wasn't set -> we can use our default plssvm::mpi::communicator
+                value = plssvm::mpi::communicator{};
+                return true;
+            } else {
+                // something was provided -> abort type casting
+                return false;
+            }
+        }
+#else
+        // we haven't MPI enabled -> check whether the "comm" argument has been provided
+        if (!obj.is_none()) {
+            // "comm" has been provided -> we can't use it -> throw an exception
+            throw py::value_error{ "ERROR: an MPI communicator was explicitly provided, but PLSSVM was built without support for MPI!" };
+        } else {
+            // "comm" was not provided -> use a default constructed plssvm::mpi::communicator that essentially does nothing
+            value = plssvm::mpi::communicator{};
+            return true;
+        }
+#endif
+    }
+};
+
+}  // namespace pybind11::detail
+
+#endif  // PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_MPI_TYPE_CASTER_HPP_
diff --git a/bindings/Python/utility.hpp b/bindings/Python/utility.hpp
index 66d5bb617..f795b4d2a 100644
--- a/bindings/Python/utility.hpp
+++ b/bindings/Python/utility.hpp
@@ -21,7 +21,7 @@
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/numpy.h"     // py::array, py::array_t, py::buffer_info, py::array::c_style
-#include "pybind11/pybind11.h"  // py::kwargs, py::value_error, py::isinstance, py::str, py::module_, py::register_exception_translator, py::set_error, py::object, py::len
+#include "pybind11/pybind11.h"  // py::kwargs, py::value_error, py::isinstance, py::str, py::module_, py::register_exception_translator, py::set_error, py::object, py::len, py::enum_, py::implicitly_convertible
 #include "pybind11/pytypes.h"   // py::type, py::ssize_t
 
 #include <cstdint>      // fixed-width integers
@@ -102,57 +102,6 @@ inline void check_kwargs_for_correctness(const py::kwargs &args, const std::vect
     }
 }
 
-/**
- * @brief Convert the `gamma` Python kwargs @p args to an `plssvm::gamma_type` object.
- * @note Assumes that @p args contains the keyword argument `gamma`!
- * @param[in] args the Python keyword arguments
- * @return the `plssvm::gamma_type` object filled with the keyword @p args (`[[nodiscard]]`)
- */
-[[nodiscard]] inline plssvm::gamma_type convert_gamma_kwarg_to_variant(const py::kwargs &args) {
-    if (py::isinstance<py::str>(args["gamma"])) {
-        // found a string
-        const auto str = args["gamma"].cast<std::string>();
-        std::istringstream is{ str };
-        plssvm::gamma_type gamma;
-        is >> gamma;
-        if (is.fail()) {
-            throw py::value_error{ fmt::format("When 'gamma' is a string, it should be either 'scale' or 'auto'. Got '{}' instead.", gamma) };
-        }
-        return gamma;
-    } else {
-        const auto gamma = args["gamma"].cast<plssvm::real_type>();
-        if (gamma <= plssvm::real_type{ 0.0 }) {
-            throw py::value_error{ fmt::format("gamma value must be > 0; {} is invalid. Use a positive number or use 'scale' or 'auto'.", gamma) };
-        }
-        return gamma;
-    }
-}
-
-/**
- * @brief Convert the Python kwargs @p args to an `plssvm::parameter` object.
- * @param[in] args the Python keyword arguments
- * @param[in] params the baseline parameter
- * @return the `plssvm::parameter` object filled with the keyword @p args (`[[nodiscard]]`)
- */
-[[nodiscard]] inline plssvm::parameter convert_kwargs_to_parameter(const py::kwargs &args, plssvm::parameter params = {}) {
-    if (args.contains("kernel_type")) {
-        params.kernel_type = args["kernel_type"].cast<decltype(params.kernel_type)>();
-    }
-    if (args.contains("degree")) {
-        params.degree = args["degree"].cast<decltype(params.degree)>();
-    }
-    if (args.contains("gamma")) {
-        params.gamma = convert_gamma_kwarg_to_variant(args);
-    }
-    if (args.contains("coef0")) {
-        params.coef0 = args["coef0"].cast<decltype(params.coef0)>();
-    }
-    if (args.contains("cost")) {
-        params.cost = args["cost"].cast<decltype(params.cost)>();
-    }
-    return params;
-}
-
 /**
  * @brief Register the PLSSVM @p Exception type as an Python exception with the @p py_exception_name derived from @p BaseException.
  * @tparam Exception the PLSSVM exception to register in Python
@@ -175,6 +124,30 @@ void register_py_exception(py::module_ &m, const std::string &py_exception_name,
     });
 }
 
+/**
+ * @brief Register the enumeration @p EnumType to be implicitly convertible from a Python string.
+ * @tparam EnumType the type of the C++ enumeration
+ * @param[in] py_enum the Pybind11 enumeration wrapper
+ * @throws py::value_error if the provided string is invalid for the @p EnumType
+ */
+template <typename EnumType>
+void register_implicit_str_enum_conversion(py::enum_<EnumType> &py_enum) {
+    // create the custom constructor
+    py_enum.def(py::init([](const std::string &str) -> EnumType {
+        std::istringstream iss{ str };
+        EnumType e;
+        iss >> e;
+        if (iss.fail()) {
+            throw py::value_error{};
+        } else {
+            return e;
+        }
+    }));
+
+    // register the implicit conversion
+    py::implicitly_convertible<std::string, EnumType>();
+}
+
 /**
  * @def PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING
  * @brief Map the @p type to its Numpy type name pendant @p numpy_name.
diff --git a/bindings/Python/verbosity_levels.cpp b/bindings/Python/verbosity_levels.cpp
index aaa8cf364..05c9dfe20 100644
--- a/bindings/Python/verbosity_levels.cpp
+++ b/bindings/Python/verbosity_levels.cpp
@@ -8,28 +8,36 @@
 
 #include "plssvm/verbosity_levels.hpp"  // plssvm::verbosity_level, bitwise operator overloads, plssvm::verbosity
 
+#include "bindings/Python/utility.hpp"  // plssvm::bindings::python::util::register_implicit_str_enum_conversion
+
+#include "pybind11/cast.h"
 #include "pybind11/operators.h"  // pybind operator overloading
-#include "pybind11/pybind11.h"   // py::module_
+#include "pybind11/pybind11.h"   // py::module_, py::enum_, py::self, py::arg
 
 namespace py = pybind11;
 
 void init_verbosity_levels(py::module_ &m) {
     // bind enum class
-    py::enum_<plssvm::verbosity_level> verb_enum(m, "VerbosityLevel", "Enum class for all possible verbosity levels used in our own logging infrastructure.");
-    verb_enum.value("QUIET", plssvm::verbosity_level::quiet, "nothing is logged to the standard output to stdout")
+    py::enum_<plssvm::verbosity_level> py_enum(m, "VerbosityLevel", "Enum class for all possible verbosity levels used in our own logging infrastructure.");
+    py_enum
+        .value("QUIET", plssvm::verbosity_level::quiet, "nothing is logged to the standard output to stdout")
         .value("LIBSVM", plssvm::verbosity_level::libsvm, "log the same messages as LIBSVM (used for better LIBSVM conformity) to stdout")
         .value("TIMING", plssvm::verbosity_level::timing, "log all messages related to timing information to stdout")
         .value("WARNING", plssvm::verbosity_level::warning, "log all messages related to warning to stderr")
         .value("FULL", plssvm::verbosity_level::full, "log all messages to stdout");
 
     // bind the bitwise operations
-    verb_enum.def(py::self | py::self)
+    py_enum
+        .def(py::self | py::self)
         .def(py::self |= py::self)
         .def(py::self & py::self)
         .def(py::self &= py::self);
 
+    // enable implicit conversion from string to enum
+    plssvm::bindings::python::util::register_implicit_str_enum_conversion<plssvm::verbosity_level>(py_enum);
+
     // enable or disable verbose output
     m.def("quiet", []() { plssvm::verbosity = plssvm::verbosity_level::quiet; }, "no command line output is made during calls to PLSSVM functions");
     m.def("get_verbosity", []() { return plssvm::verbosity; }, "get the currently set verbosity level for all PLSSVM outputs to stdout");
-    m.def("set_verbosity", [](const plssvm::verbosity_level verb) { plssvm::verbosity = verb; }, "set the verbosity level for all PLSSVM outputs to stdout");
+    m.def("set_verbosity", [](const plssvm::verbosity_level verb) { plssvm::verbosity = verb; }, "set the verbosity level for all PLSSVM outputs to stdout", py::arg("verbosity"));
 }
diff --git a/bindings/Python/version/version.cpp b/bindings/Python/version/version.cpp
deleted file mode 100644
index 356f6e786..000000000
--- a/bindings/Python/version/version.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * @author Alexander Van Craen
- * @author Marcel Breyer
- * @copyright 2018-today The PLSSVM project - All Rights Reserved
- * @license This file is part of the PLSSVM project which is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- */
-
-#include "plssvm/version/version.hpp"  // plssvm::version::{name, version, major, minor, patch}
-
-#include "pybind11/pybind11.h"  // py::module_, py::class_, py::object
-#include "pybind11/stl.h"       // support for STL types: std::string
-
-namespace py = pybind11;
-
-// dummy class
-class version { };
-
-void init_version(py::module_ &m) {
-    // bind global version information
-    // complexity necessary to enforce read-only (py::object necessary for def_property_readonly_static)
-    py::class_<version>(m, "version", "A version class encapsulation all PLSSVM version information.")
-        .def_property_readonly_static("name", [](const py::object &) { return plssvm::version::name; }, "the name of the PLSSVM library")
-        .def_property_readonly_static("version", [](const py::object &) { return plssvm::version::version; }, "the used version of the PLSSVM library")
-        .def_property_readonly_static("major", [](const py::object &) { return plssvm::version::major; }, "the used major version of the PLSSVM library")
-        .def_property_readonly_static("minor", [](const py::object &) { return plssvm::version::minor; }, "the used minor version of the PLSSVM library")
-        .def_property_readonly_static("patch", [](const py::object &) { return plssvm::version::patch; }, "the used patch version of the PLSSVM library");
-}
diff --git a/cmake/add_coverage_build_type.cmake b/cmake/add_coverage_build_type.cmake
index 4b55fcb33..598d6dd18 100644
--- a/cmake/add_coverage_build_type.cmake
+++ b/cmake/add_coverage_build_type.cmake
@@ -4,15 +4,22 @@
 #          See the LICENSE.md file in the project root for full license information.
 ########################################################################################################################
 
+# define the used compiler and linker flags for the coverage target
+set(PLSSVM_COVERAGE_COMPILER_FLAGS
+    "-O0 -g --coverage -fprofile-abs-path -fno-inline -fno-inline-functions -fno-inline-small-functions -fno-elide-constructors -fno-common -ffunction-sections -fno-omit-frame-pointer"
+)
+set(PLSSVM_COVERAGE_LINKER_FLAGS "-O0 -g --coverage -fno-lto -lgcov")
+set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF CACHE BOOL "" FORCE)
+
 # add new coverage build type
-set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage" CACHE STRING "Flags used by the C++ compiler during coverage builds."
-                                                                                             FORCE
+set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG} ${PLSSVM_COVERAGE_COMPILER_FLAGS}" CACHE STRING "Flags used by the C++ compiler during coverage builds."
+                                                                                                FORCE
 )
-set(CMAKE_C_FLAGS_COVERAGE "${CMAKE_C_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage" CACHE STRING "Flags used by the C compiler during coverage builds." FORCE)
-set(CMAKE_EXE_LINKER_FLAGS_COVERAGE "${CMAKE_EXE_LINKER_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -lgcov"
+set(CMAKE_C_FLAGS_COVERAGE "${CMAKE_C_FLAGS_DEBUG} ${PLSSVM_COVERAGE_COMPILER_FLAGS}" CACHE STRING "Flags used by the C compiler during coverage builds." FORCE)
+set(CMAKE_EXE_LINKER_FLAGS_COVERAGE "${CMAKE_EXE_LINKER_FLAGS_DEBUG} ${PLSSVM_COVERAGE_LINKER_FLAGS} -lgcov"
     CACHE STRING "Flags used for linking binaries during coverage builds." FORCE
 )
-set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -lgcov"
+set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} ${PLSSVM_COVERAGE_LINKER_FLAGS} -lgcov"
     CACHE STRING "Flags used by the shared libraries linker during coverage builds." FORCE
 )
 mark_as_advanced(CMAKE_CXX_FLAGS_COVERAGE CMAKE_C_FLAGS_COVERAGE CMAKE_EXE_LINKER_FLAGS_COVERAGE CMAKE_SHARED_LINKER_FLAGS_COVERAGE)
diff --git a/cmake/assemble_icpx_sycl_target_flags.cmake b/cmake/assemble_icpx_sycl_target_flags.cmake
new file mode 100644
index 000000000..a83af9967
--- /dev/null
+++ b/cmake/assemble_icpx_sycl_target_flags.cmake
@@ -0,0 +1,98 @@
+# Authors: Alexander Van Craen, Marcel Breyer
+# Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+# License: This file is part of the PLSSVM project which is released under the MIT license.
+#          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+# function to check whether a string starts with a substring
+function (startswith out_var string prefix)
+    string(FIND "${string}" "${prefix}" pos)
+    if (pos EQUAL 0)
+        set(${out_var} ON PARENT_SCOPE)
+    else ()
+        set(${out_var} OFF PARENT_SCOPE)
+    endif ()
+endfunction ()
+
+# function to assemble the icpx compiler flags and add them to the target
+function (assemble_icpx_sycl_target_flags target scope)
+    set(_fsycl_target_archs "")
+
+    # CPU targets
+    if (DEFINED PLSSVM_CPU_TARGET_ARCHS)
+        # assemble -fsycl-targets
+        list(APPEND _fsycl_target_archs "spir64_x86_64")
+
+        # set target arch explicitly
+        if (PLSSVM_NUM_CPU_TARGET_ARCHS EQUAL 1)
+            target_link_options(${target} ${scope} -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}")
+        endif ()
+    endif ()
+
+    # NVIDIA GPU targets
+    if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
+        # assemble -fsycl-targets
+        foreach (_arch ${PLSSVM_NVIDIA_TARGET_ARCHS})
+            list(APPEND _fsycl_target_archs "nvidia_gpu_${_arch}")
+        endforeach ()
+
+        # add lineinfo for easier profiling
+        target_link_options(${target} ${scope} -Xcuda-ptxas -lineinfo)
+        # add verbose kernel compilation information to output if in Debug mode
+        target_link_options(${target} ${scope} $<$<CONFIG:Debug>:-Xcuda-ptxas --verbose>)
+    endif ()
+
+    # AMD GPU targets
+    if (DEFINED PLSSVM_AMD_TARGET_ARCHS)
+        # assemble -fsycl-targets
+        foreach (_arch ${PLSSVM_AMD_TARGET_ARCHS})
+            list(APPEND _fsycl_target_archs "amd_gpu_${_arch}")
+        endforeach ()
+    endif ()
+
+    # Intel GPU targets
+    if (DEFINED PLSSVM_INTEL_TARGET_ARCHS)
+        # iterate over all target archs and check how many of them are provided as HEX values
+        set(PLSSVM_INTEL_TARGET_ARCH_NUM_HEX 0)
+        foreach (_arch ${PLSSVM_INTEL_TARGET_ARCHS})
+            # test whether the arch is a hex value
+            startswith(PLSSVM_INTEL_TARGET_ARCH_IS_HEX "${_arch}" "0x")
+            if (PLSSVM_INTEL_TARGET_ARCH_IS_HEX)
+                math(EXPR PLSSVM_INTEL_TARGET_ARCH_NUM_HEX "${PLSSVM_INTEL_TARGET_ARCH_NUM_HEX} + 1")
+            endif ()
+        endforeach ()
+
+        # either ALL targets must be provided as HEX values or NONE
+        if (PLSSVM_INTEL_TARGET_ARCH_NUM_HEX EQUAL 0)
+            # no architecture was provided as HEX value -> use new shortcuts
+            foreach (_arch ${PLSSVM_INTEL_TARGET_ARCHS})
+                list(APPEND _fsycl_target_archs "intel_gpu_${_arch}")
+            endforeach ()
+        elseif (PLSSVM_INTEL_TARGET_ARCH_NUM_HEX EQUAL PLSSVM_NUM_INTEL_TARGET_ARCHS)
+            if (PLSSVM_NUM_INTEL_TARGET_ARCHS GREATER 1)
+                message(
+                    FATAL_ERROR
+                        "When specifying the Intel architectures with HEX values, only a single architecture is supported but ${PLSSVM_NUM_INTEL_TARGET_ARCHS} where provided!"
+                )
+            endif ()
+            # use old way to specify architectures
+            list(APPEND _fsycl_target_archs "spir64_gen")
+            list(JOIN PLSSVM_INTEL_TARGET_ARCHS "," PLSSVM_INTEL_TARGET_ARCHS_STRING)
+            target_compile_options(${target} ${scope} -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}")
+            target_link_options(${target} ${scope} -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}")
+        else ()
+            message(
+                FATAL_ERROR
+                    "The provided Intel GPU target architectures (${PLSSVM_INTEL_TARGET_ARCHS}) are a mixture between device IDs (hex values) and architecture names but only either of them are supported!"
+            )
+        endif ()
+    endif ()
+
+    # apply -fsycl-targets
+    list(JOIN _fsycl_target_archs "," _fsycl_target_archs_string)
+    if (NOT _fsycl_target_archs_string STREQUAL "")
+        message(STATUS "Compiling for -fsycl-targets=${_fsycl_target_archs}")
+        target_compile_options(${target} ${scope} -fsycl-targets=${_fsycl_target_archs_string})
+        target_link_options(${target} ${scope} -fsycl-targets=${_fsycl_target_archs_string})
+    endif ()
+endfunction ()
diff --git a/cmake/plssvm/plssvmAdaptiveCppTargets.cmake b/cmake/plssvm/plssvmAdaptiveCppTargets.cmake
deleted file mode 100644
index 8c124701e..000000000
--- a/cmake/plssvm/plssvmAdaptiveCppTargets.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-# Authors: Alexander Van Craen, Marcel Breyer
-# Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
-# License: This file is part of the PLSSVM project which is released under the MIT license.
-#          See the LICENSE.md file in the project root for full license information.
-########################################################################################################################
-
-include(CMakeFindDependencyMacro)
-
-# check if the AdaptiveCpp backend is available
-if (TARGET plssvm::plssvm-SYCL_adaptivecpp)
-    # enable AdaptiveCpp
-    find_dependency(AdaptiveCpp CONFIG)
-    # set alias targets
-    add_library(plssvm::AdaptiveCpp ALIAS plssvm::plssvm-SYCL_adaptivecpp)
-    add_library(plssvm::adaptivecpp ALIAS plssvm::plssvm-SYCL_adaptivecpp)
-    # set COMPONENT to be found
-    set(plssvm_AdaptiveCpp_FOUND ON)
-else ()
-    # set COMPONENT to be NOT found
-    set(plssvm_AdaptiveCpp_FOUND OFF)
-endif ()
diff --git a/cmake/plssvm/plssvmAdaptiveCppTargets.cmake.in b/cmake/plssvm/plssvmAdaptiveCppTargets.cmake.in
new file mode 100644
index 000000000..c61268cfd
--- /dev/null
+++ b/cmake/plssvm/plssvmAdaptiveCppTargets.cmake.in
@@ -0,0 +1,54 @@
+# Authors: Alexander Van Craen, Marcel Breyer
+# Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+# License: This file is part of the PLSSVM project which is released under the MIT license.
+#          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+include(CMakeFindDependencyMacro)
+
+# check if AdaptiveCpp is an optional component
+is_component_optional(AdaptiveCpp)
+
+# check if the AdaptiveCpp backend is available
+if (TARGET plssvm::plssvm-SYCL_AdaptiveCpp)
+    # set ACPP_TARGETS
+    set(ACPP_TARGETS "@ACPP_TARGETS@")
+    if (NOT plssvm_FIND_QUIETLY)
+        message(STATUS "Setting ACPP_TARGETS to \"${ACPP_TARGETS}\".")
+    endif ()
+    # enable AdaptiveCpp
+    find_dependency(AdaptiveCpp CONFIG)
+    
+    # set alias targets
+    add_library(plssvm::AdaptiveCpp ALIAS plssvm::plssvm-SYCL_AdaptiveCpp)
+    add_library(plssvm::adaptivecpp ALIAS plssvm::plssvm-SYCL_AdaptiveCpp)
+    
+    # set COMPONENT to be found
+    set(plssvm_AdaptiveCpp_FOUND ON)
+    if (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_AdaptiveCpp)
+            message(STATUS "Found optional component \"AdaptiveCpp\".")
+        else ()
+            message(STATUS "Found component \"AdaptiveCpp\".")
+        endif ()
+    endif ()
+else ()
+    # set COMPONENT to be NOT found
+    set(plssvm_AdaptiveCpp_FOUND OFF)
+    # PLSSVM is only not found if AdaptiveCpp is NOT an optional component
+    if (NOT plssvm_FIND_OPTIONAL_AdaptiveCpp)
+        set(plssvm_FOUND OFF)
+    endif ()
+    
+    # if REQUIRED was set in the find_package call, fail
+    if (plssvm_FIND_REQUIRED AND NOT plssvm_FIND_OPTIONAL_AdaptiveCpp)
+        set(plssvm_NOT_FOUND_MESSAGE "Couldn't find required component \"AdaptiveCpp\".")
+        return()
+    elseif (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_AdaptiveCpp)
+            message(STATUS "Couldn't find optional component \"AdaptiveCpp\".")
+        else ()
+            message(STATUS "Couldn't find component \"AdaptiveCpp\".")
+        endif ()
+    endif ()
+endif ()
diff --git a/cmake/plssvm/plssvmCUDATargets.cmake b/cmake/plssvm/plssvmCUDATargets.cmake
index 6f8362e90..5e98953c1 100644
--- a/cmake/plssvm/plssvmCUDATargets.cmake
+++ b/cmake/plssvm/plssvmCUDATargets.cmake
@@ -6,17 +6,45 @@
 
 include(CMakeFindDependencyMacro)
 
+# check if CUDA is an optional component
+is_component_optional(CUDA)
+
 # check if the CUDA backend is available
 if (TARGET plssvm::plssvm-CUDA)
     # enable CUDA
     enable_language(CUDA)
     find_dependency(CUDAToolkit)
+
     # set alias targets
     add_library(plssvm::CUDA ALIAS plssvm::plssvm-CUDA)
     add_library(plssvm::cuda ALIAS plssvm::plssvm-CUDA)
+
     # set COMPONENT to be found
     set(plssvm_CUDA_FOUND ON)
+    if (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_CUDA)
+            message(STATUS "Found optional component \"CUDA\".")
+        else ()
+            message(STATUS "Found component \"CUDA\".")
+        endif ()
+    endif ()
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_CUDA_FOUND OFF)
+    # PLSSVM is only not found if CUDA is NOT an optional component
+    if (NOT plssvm_FIND_OPTIONAL_CUDA)
+        set(plssvm_FOUND OFF)
+    endif ()
+
+    # if REQUIRED was set in the find_package call, fail
+    if (plssvm_FIND_REQUIRED AND NOT plssvm_FIND_OPTIONAL_CUDA)
+        set(plssvm_NOT_FOUND_MESSAGE "Couldn't find required component \"CUDA\".")
+        return()
+    elseif (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_CUDA)
+            message(STATUS "Couldn't find optional component \"CUDA\".")
+        else ()
+            message(STATUS "Couldn't find component \"CUDA\".")
+        endif ()
+    endif ()
 endif ()
diff --git a/cmake/plssvm/plssvmConfig.cmake.in b/cmake/plssvm/plssvmConfig.cmake.in
index beb6801bc..bb393805e 100644
--- a/cmake/plssvm/plssvmConfig.cmake.in
+++ b/cmake/plssvm/plssvmConfig.cmake.in
@@ -6,14 +6,35 @@
 
 @PACKAGE_INIT@
 
+# helper function to determine whether a component is requested via OPTIONAL_COMPONENTS
+function (is_component_optional comp)
+    if (plssvm_FIND_COMPONENTS AND NOT plssvm_FIND_REQUIRED_${comp})
+        set(plssvm_FIND_OPTIONAL_${comp} ON PARENT_SCOPE)
+    else ()
+        set(plssvm_FIND_OPTIONAL_${comp} OFF PARENT_SCOPE)
+    endif ()
+endfunction ()
+
 include(CMakeFindDependencyMacro)
 
-# check if the OpenMP is required for the library utilities
+# check if the OpenMP library is required for the library utilities
 set(PLSSVM_HAS_OPENMP_UTILITY @PLSSVM_FOUND_OPENMP_FOR_UTILITY@)
 if (PLSSVM_HAS_OPENMP_UTILITY)
     find_dependency(OpenMP)
 endif ()
 
+# check if the MPI library is required for distributed memory support
+set(PLSSVM_HAS_MPI @PLSSVM_FOUND_MPI@)
+if (PLSSVM_HAS_MPI)
+    find_dependency(MPI)
+endif ()
+
+# check if hws was used
+set(PLSSVM_HAS_HWS @PLSSVM_ENABLE_HARDWARE_SAMPLING@)
+if (PLSSVM_HAS_HWS)
+    find_dependency(hws)
+endif ()
+
 # always try finding {fmt}
 # -> CMAKE_PREFIX_PATH necessary if build via FetchContent
 # -> doesn't hurt to be set everytime
@@ -26,66 +47,70 @@ include("${CMAKE_CURRENT_LIST_DIR}/plssvmTargets.cmake")
 
 # list all available libraries
 set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;HPX;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;Kokkos;stdpar")
-set(PLSSVM_DISABLED_COMPONENTS "${PLSSVM_SUPPORTED_COMPONENTS}")
-
-# check which libraries are available
-set(PLSSVM_ENABLED_COMPONENTS)
-foreach (_comp ${PLSSVM_SUPPORTED_COMPONENTS})
-    # check "normal" components
-    if (TARGET plssvm::plssvm-${_comp})
-        list(APPEND PLSSVM_ENABLED_COMPONENTS ${_comp})
-    else ()
-        # check for SYCL component
-        string(TOLOWER ${_comp} lower_case_comp)
-        if (TARGET plssvm::plssvm-SYCL_${lower_case_comp})
-            list(APPEND PLSSVM_ENABLED_COMPONENTS ${_comp})
-        endif ()
-    endif ()
-endforeach ()
 
-# no component are provided -> load everything
+# assume PLSSVM can be found -> will be overwritten with OFF if it isn't the case
+set(plssvm_FOUND ON)
+
 if (NOT plssvm_FIND_COMPONENTS)
-    set(plssvm_FIND_COMPONENTS ${PLSSVM_ENABLED_COMPONENTS})
+    # no components were requested -> try finding everything that was enabled in PLSSVM
+    foreach (_comp ${PLSSVM_SUPPORTED_COMPONENTS})
+        if (TARGET plssvm::plssvm-${_comp} OR TARGET plssvm::plssvm-SYCL_${_comp})
+            # target exists -> try enabling it
+            include("${CMAKE_CURRENT_LIST_DIR}/plssvm${_comp}Targets.cmake")
+        else ()
+            # target doesn't exist -> set it to off
+            set(plssvm_${_comp}_FOUND OFF)
+        endif ()
+    endforeach ()
+else ()
+    # check whether all provided components are valid
+    foreach (_comp ${plssvm_FIND_COMPONENTS})
+        if (NOT ";${PLSSVM_SUPPORTED_COMPONENTS};" MATCHES ";${_comp};")
+            set(plssvm_FOUND OFF)
+            set(plssvm_NOT_FOUND_MESSAGE "Unknown component \"${_comp}\" requested by find_package(plssvm).")
+            return()
+        endif()
+    endforeach ()
+    # check whether the requested components can be enabled
+    foreach (_comp ${PLSSVM_SUPPORTED_COMPONENTS})
+        if (${_comp} IN_LIST plssvm_FIND_COMPONENTS)
+            # target exists -> try enabling it
+            include("${CMAKE_CURRENT_LIST_DIR}/plssvm${_comp}Targets.cmake")
+        else ()
+            # target doesn't exist -> set it to off
+            set(plssvm_${_comp}_FOUND OFF)
+        endif ()
+    endforeach ()
 endif ()
 
+# check possible provided version
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(plssvm
+    REQUIRED_VARS plssvm_FOUND
+    VERSION_VAR plssvm_VERSION
+)
+
+# add additional output if PLSSVM could be found
+if (NOT plssvm_FIND_QUIETLY AND plssvm_FOUND)
+    # output the available PLSSVM target platforms
+    message(STATUS "The available PLSSVM_TARGET_PLATFORMS are: @PLSSVM_TARGET_PLATFORMS@.")
+    # output the available PLSSVM backends
+    message(STATUS "The available PLSSVM backends are: @PLSSVM_BACKEND_NAME_LIST@.")
+    # output the PLSSVM build type
+    message(STATUS "The PLSSVM library was built in @CMAKE_BUILD_TYPE@ mode.")
+    # output if assertions are enabled
+    if (@PLSSVM_ENABLE_ASSERTS@)
+        message(STATUS "The PLSSVM library was built with asserts enabled.")
+    endif ()
+    # output if performance tracking is enabled
+    if (@PLSSVM_ENABLE_PERFORMANCE_TRACKING@)
+        message(STATUS "The PLSSVM library was built with performance tracking enabled.")
+    endif ()
+endif()
+
 # set convenience alias targets
 add_library(plssvm::all ALIAS plssvm::plssvm-all)
 add_library(plssvm::plssvm ALIAS plssvm::plssvm-all)
 
-# load the library components
-foreach (_comp ${plssvm_FIND_COMPONENTS})
-    if (NOT ";${PLSSVM_SUPPORTED_COMPONENTS};" MATCHES ";${_comp};")
-        set(plssvm_FOUND OFF)
-        set(plssvm_NOT_FOUND_MESSAGE "Unsupported component: ${_comp}")
-    elseif (NOT ";${PLSSVM_ENABLED_COMPONENTS};" MATCHES ";${_comp};")
-        set(plssvm_FOUND OFF)
-        set(plssvm_NOT_FOUND_MESSAGE "Component ${_comp} wasn't enabled!")
-    else ()
-        # set component specific variables
-        if (${_comp} MATCHES "AdaptiveCpp")
-            set(ACPP_TARGETS @ACPP_TARGETS@)
-        elseif (${_comp} MATCHES "stdpar")
-            set(PLSSVM_STDPAR_BACKEND @PLSSVM_STDPAR_BACKEND@)
-            if (PLSSVM_STDPAR_BACKEND MATCHES "roc-stdpar")
-                set(PLSSVM_STDPAR_BACKEND_HIPSTDPAR_PATH "@PLSSVM_STDPAR_BACKEND_HIPSTDPAR_PATH@")
-            endif ()
-            set(CMAKE_CXX_FLAGS "@CMAKE_CXX_FLAGS@")
-            if (CMAKE_CXX_FLAGS)
-                message(STATUS "Setting CMAKE_CXX_FLAGS for the plssvm::stdpar backend to: \"${CMAKE_CXX_FLAGS}\"")
-            endif ()
-        endif ()
-
-        # include the component specific config file
-        include("${CMAKE_CURRENT_LIST_DIR}/plssvm${_comp}Targets.cmake")
-        # remove the found element from the list
-        list(REMOVE_ITEM PLSSVM_DISABLED_COMPONENTS "${_comp}")
-    endif()
-endforeach ()
-
-# set the remaining components to OFF
-foreach (_comp ${PLSSVM_DISABLED_COMPONENTS})
-    set(plssvm_${_comp}_FOUND OFF)
-endforeach ()
-
 # sanity checks
 check_required_components("plssvm")
diff --git a/cmake/plssvm/plssvmDPCPPTargets.cmake b/cmake/plssvm/plssvmDPCPPTargets.cmake
index 403585807..482e1a451 100644
--- a/cmake/plssvm/plssvmDPCPPTargets.cmake
+++ b/cmake/plssvm/plssvmDPCPPTargets.cmake
@@ -6,14 +6,41 @@
 
 include(CMakeFindDependencyMacro)
 
+# check if DPCPP is an optional component
+is_component_optional(DPCPP)
+
 # check if the AdaptiveCpp backend is available
-if (TARGET plssvm::plssvm-SYCL_dpcpp)
+if (TARGET plssvm::plssvm-SYCL_DPCPP)
     # set alias targets
-    add_library(plssvm::DPCPP ALIAS plssvm::plssvm-SYCL_dpcpp)
-    add_library(plssvm::dpcpp ALIAS plssvm::plssvm-SYCL_dpcpp)
+    add_library(plssvm::DPCPP ALIAS plssvm::plssvm-SYCL_DPCPP)
+    add_library(plssvm::dpcpp ALIAS plssvm::plssvm-SYCL_DPCPP)
+
     # set COMPONENT to be found
     set(plssvm_DPCPP_FOUND ON)
+    if (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_DPCPP)
+            message(STATUS "Found optional component \"DPCPP\".")
+        else ()
+            message(STATUS "Found component \"DPCPP\".")
+        endif ()
+    endif ()
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_DPCPP_FOUND OFF)
+    # PLSSVM is only not found if DPCPP is NOT an optional component
+    if (NOT plssvm_FIND_OPTIONAL_DPCPP)
+        set(plssvm_FOUND OFF)
+    endif ()
+
+    # if REQUIRED was set in the find_package call, fail
+    if (plssvm_FIND_REQUIRED AND NOT plssvm_FIND_OPTIONAL_DPCPP)
+        set(plssvm_NOT_FOUND_MESSAGE "Couldn't find required component \"DPCPP\".")
+        return()
+    elseif (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_DPCPP)
+            message(STATUS "Couldn't find optional component \"DPCPP\".")
+        else ()
+            message(STATUS "Couldn't find component \"DPCPP\".")
+        endif ()
+    endif ()
 endif ()
diff --git a/cmake/plssvm/plssvmHIPTargets.cmake.in b/cmake/plssvm/plssvmHIPTargets.cmake.in
index 3dd527bb9..b9b7c174b 100644
--- a/cmake/plssvm/plssvmHIPTargets.cmake.in
+++ b/cmake/plssvm/plssvmHIPTargets.cmake.in
@@ -6,17 +6,50 @@
 
 include(CMakeFindDependencyMacro)
 
+# check if HIP is an optional component
+is_component_optional(HIP)
+
 # check if the HIP backend is available
 if (TARGET plssvm::plssvm-HIP)
     # enable HIP or CUDA
+    if (NOT plssvm_FIND_QUIETLY)
+        message(STATUS "Using @PLSSVM_HIP_BACKEND_GPU_RUNTIME@ as HIP runtime.")
+    endif ()
     enable_language(@PLSSVM_HIP_BACKEND_GPU_RUNTIME@)
-    find_dependency(HIP REQUIRED)
+    if (@PLSSVM_HIP_BACKEND_GPU_RUNTIME@ STREQUAL "HIP")
+        find_dependency(HIP REQUIRED)
+    endif ()
+    
     # set alias targets
     add_library(plssvm::HIP ALIAS plssvm::plssvm-HIP)
     add_library(plssvm::hip ALIAS plssvm::plssvm-HIP)
+    
     # set COMPONENT to be found
     set(plssvm_HIP_FOUND ON)
+    if (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_HIP)
+            message(STATUS "Found optional component \"HIP\".")
+        else ()
+            message(STATUS "Found component \"HIP\".")
+        endif ()
+    endif ()
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_HIP_FOUND OFF)
+    # PLSSVM is only not found if HIP is NOT an optional component
+    if (NOT plssvm_FIND_OPTIONAL_HIP)
+        set(plssvm_FOUND OFF)
+    endif ()
+    
+    # if REQUIRED was set in the find_package call, fail
+    if (plssvm_FIND_REQUIRED AND NOT plssvm_FIND_OPTIONAL_HIP)
+        set(plssvm_NOT_FOUND_MESSAGE "Couldn't find required component \"HIP\".")
+        return()
+    elseif (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_HIP)
+            message(STATUS "Couldn't find optional component \"HIP\".")
+        else ()
+            message(STATUS "Couldn't find component \"HIP\".")
+        endif ()
+    endif ()
 endif ()
\ No newline at end of file
diff --git a/cmake/plssvm/plssvmHPXTargets.cmake b/cmake/plssvm/plssvmHPXTargets.cmake
index 8cdda54e2..48c1e5862 100644
--- a/cmake/plssvm/plssvmHPXTargets.cmake
+++ b/cmake/plssvm/plssvmHPXTargets.cmake
@@ -6,16 +6,44 @@
 
 include(CMakeFindDependencyMacro)
 
+# check if HPX is an optional component
+is_component_optional(HPX)
+
 # check if the HPX backend is available
 if (TARGET plssvm::plssvm-HPX)
     # enable HPX
     find_dependency(HPX)
+
     # set alias targets
     add_library(plssvm::HPX ALIAS plssvm::plssvm-HPX)
     add_library(plssvm::hpx ALIAS plssvm::plssvm-HPX)
+
     # set COMPONENT to be found
     set(plssvm_HPX_FOUND ON)
+    if (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_HPX)
+            message(STATUS "Found optional component \"HPX\".")
+        else ()
+            message(STATUS "Found component \"HPX\".")
+        endif ()
+    endif ()
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_HPX_FOUND OFF)
+    # PLSSVM is only not found if HPX is NOT an optional component
+    if (NOT plssvm_FIND_OPTIONAL_HPX)
+        set(plssvm_FOUND OFF)
+    endif ()
+
+    # if REQUIRED was set in the find_package call, fail
+    if (plssvm_FIND_REQUIRED AND NOT plssvm_FIND_OPTIONAL_HPX)
+        set(plssvm_NOT_FOUND_MESSAGE "Couldn't find required component \"HPX\".")
+        return()
+    elseif (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_HPX)
+            message(STATUS "Couldn't find optional component \"HPX\".")
+        else ()
+            message(STATUS "Couldn't find component \"HPX\".")
+        endif ()
+    endif ()
 endif ()
diff --git a/cmake/plssvm/plssvmKokkosTargets.cmake b/cmake/plssvm/plssvmKokkosTargets.cmake
index ea720dfa1..1b4284413 100644
--- a/cmake/plssvm/plssvmKokkosTargets.cmake
+++ b/cmake/plssvm/plssvmKokkosTargets.cmake
@@ -6,16 +6,44 @@
 
 include(CMakeFindDependencyMacro)
 
+# check if Kokkos is an optional component
+is_component_optional(Kokkos)
+
 # check if the Kokkos backend is available
 if (TARGET plssvm::plssvm-Kokkos)
     # enable Kokkos
     find_dependency(Kokkos CONFIG)
+
     # set alias targets
     add_library(plssvm::Kokkos ALIAS plssvm::plssvm-Kokkos)
     add_library(plssvm::kokkos ALIAS plssvm::plssvm-Kokkos)
+
     # set COMPONENT to be found
     set(plssvm_Kokkos_FOUND ON)
+    if (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_Kokkos)
+            message(STATUS "Found optional component \"Kokkos\".")
+        else ()
+            message(STATUS "Found component \"Kokkos\".")
+        endif ()
+    endif ()
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_Kokkos_FOUND OFF)
+    # PLSSVM is only not found if Kokkos is NOT an optional component
+    if (NOT plssvm_FIND_OPTIONAL_Kokkos)
+        set(plssvm_FOUND OFF)
+    endif ()
+
+    # if REQUIRED was set in the find_package call, fail
+    if (plssvm_FIND_REQUIRED AND NOT plssvm_FIND_OPTIONAL_Kokkos)
+        set(plssvm_NOT_FOUND_MESSAGE "Couldn't find required component \"Kokkos\".")
+        return()
+    elseif (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_Kokkos)
+            message(STATUS "Couldn't find optional component \"Kokkos\".")
+        else ()
+            message(STATUS "Couldn't find component \"Kokkos\".")
+        endif ()
+    endif ()
 endif ()
diff --git a/cmake/plssvm/plssvmOpenCLTargets.cmake b/cmake/plssvm/plssvmOpenCLTargets.cmake
index 5e3c01344..b7252f09d 100644
--- a/cmake/plssvm/plssvmOpenCLTargets.cmake
+++ b/cmake/plssvm/plssvmOpenCLTargets.cmake
@@ -6,16 +6,44 @@
 
 include(CMakeFindDependencyMacro)
 
+# check if OpenCL is an optional component
+is_component_optional(OpenCL)
+
 # check if the OpenCL backend is available
 if (TARGET plssvm::plssvm-OpenCL)
     # enable OpenCL
     find_dependency(OpenCL)
+
     # set alias targets
     add_library(plssvm::OpenCL ALIAS plssvm::plssvm-OpenCL)
     add_library(plssvm::opencl ALIAS plssvm::plssvm-OpenCL)
+
     # set COMPONENT to be found
     set(plssvm_OpenCL_FOUND ON)
+    if (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_OpenCL)
+            message(STATUS "Found optional component \"OpenCL\".")
+        else ()
+            message(STATUS "Found component \"OpenCL\".")
+        endif ()
+    endif ()
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_OpenCL_FOUND OFF)
+    # PLSSVM is only not found if OpenCL is NOT an optional component
+    if (NOT plssvm_FIND_OPTIONAL_OpenCL)
+        set(plssvm_FOUND OFF)
+    endif ()
+
+    # if REQUIRED was set in the find_package call, fail
+    if (plssvm_FIND_REQUIRED AND NOT plssvm_FIND_OPTIONAL_OpenCL)
+        set(plssvm_NOT_FOUND_MESSAGE "Couldn't find required component \"OpenCL\".")
+        return()
+    elseif (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_OpenCL)
+            message(STATUS "Couldn't find optional component \"OpenCL\".")
+        else ()
+            message(STATUS "Couldn't find component \"OpenCL\".")
+        endif ()
+    endif ()
 endif ()
diff --git a/cmake/plssvm/plssvmOpenMPTargets.cmake b/cmake/plssvm/plssvmOpenMPTargets.cmake
index 256a46eea..ba9a201bf 100644
--- a/cmake/plssvm/plssvmOpenMPTargets.cmake
+++ b/cmake/plssvm/plssvmOpenMPTargets.cmake
@@ -6,16 +6,44 @@
 
 include(CMakeFindDependencyMacro)
 
+# check if OpenMP is an optional component
+is_component_optional(OpenMP)
+
 # check if the OpenMP backend is available
 if (TARGET plssvm::plssvm-OpenMP)
     # enable OpenMP
     find_dependency(OpenMP)
+
     # set alias targets
     add_library(plssvm::OpenMP ALIAS plssvm::plssvm-OpenMP)
     add_library(plssvm::openmp ALIAS plssvm::plssvm-OpenMP)
+
     # set COMPONENT to be found
     set(plssvm_OpenMP_FOUND ON)
+    if (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_OpenMP)
+            message(STATUS "Found optional component \"OpenMP\".")
+        else ()
+            message(STATUS "Found component \"OpenMP\".")
+        endif ()
+    endif ()
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_OpenMP_FOUND OFF)
+    # PLSSVM is only not found if OpenMP is NOT an optional component
+    if (NOT plssvm_FIND_OPTIONAL_OpenMP)
+        set(plssvm_FOUND OFF)
+    endif ()
+
+    # if REQUIRED was set in the find_package call, fail
+    if (plssvm_FIND_REQUIRED AND NOT plssvm_FIND_OPTIONAL_OpenMP)
+        set(plssvm_NOT_FOUND_MESSAGE "Couldn't find required component \"OpenMP\".")
+        return()
+    elseif (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_OpenMP)
+            message(STATUS "Couldn't find optional component \"OpenMP\".")
+        else ()
+            message(STATUS "Couldn't find component \"OpenMP\".")
+        endif ()
+    endif ()
 endif ()
diff --git a/cmake/plssvm/plssvmstdparTargets.cmake b/cmake/plssvm/plssvmstdparTargets.cmake.in
similarity index 67%
rename from cmake/plssvm/plssvmstdparTargets.cmake
rename to cmake/plssvm/plssvmstdparTargets.cmake.in
index 91eaeb9bb..1c706c28c 100644
--- a/cmake/plssvm/plssvmstdparTargets.cmake
+++ b/cmake/plssvm/plssvmstdparTargets.cmake.in
@@ -6,19 +6,29 @@
 
 include(CMakeFindDependencyMacro)
 
+# check if stdpar is an optional component
+is_component_optional(stdpar)
+
 # check if the stdpar backend is available
 if (TARGET plssvm::plssvm-stdpar)
+    # configure variables
+    set(PLSSVM_STDPAR_BACKEND @PLSSVM_STDPAR_BACKEND@)
+    set(CMAKE_CXX_FLAGS "@PLSSVM_ESCAPED_CXX_FLAGS@")
+    if (CMAKE_CXX_FLAGS AND NOT plssvm_FIND_QUIETLY)
+        message(STATUS "Setting CMAKE_CXX_FLAGS for the plssvm::stdpar backend to: \"${CMAKE_CXX_FLAGS}\"")
+    endif ()
+    
     # enable stdpar based on the used stdpar implementation
     include(CheckCXXCompilerFlag)
     if (PLSSVM_STDPAR_BACKEND MATCHES "NVHPC")
-        enable_language(CUDA)
-        find_dependency(CUDAToolkit)
         if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
             set(plssvm_FOUND OFF)
             set(plssvm_stdpar_FOUND OFF)
             set(plssvm_NOT_FOUND_MESSAGE "The CMAKE_CXX_COMPILER must be set to NVIDIA's HPC SDK compiler (nvc++) in user code in order to use plssvm::stdpar!")
             return()
         endif ()
+        enable_language(CUDA)
+        find_dependency(CUDAToolkit)
     elseif (PLSSVM_STDPAR_BACKEND MATCHES "roc-stdpar")
         check_cxx_compiler_flag("--hipstdpar" PLSSVM_HAS_HIPSTDPAR_STDPAR_FLAG)
         if (NOT PLSSVM_HAS_HIPSTDPAR_STDPAR_FLAG)
@@ -30,7 +40,6 @@ if (TARGET plssvm::plssvm-stdpar)
             return()
         endif ()
     elseif (PLSSVM_STDPAR_BACKEND MATCHES "IntelLLVM")
-        find_dependency(oneDPL)
         check_cxx_compiler_flag("-fsycl-pstl-offload" PLSSVM_HAS_INTEL_LLVM_STDPAR_FLAG)
         if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM" AND NOT PLSSVM_HAS_INTEL_LLVM_STDPAR_FLAG)
             set(plssvm_FOUND OFF)
@@ -38,8 +47,8 @@ if (TARGET plssvm::plssvm-stdpar)
             set(plssvm_NOT_FOUND_MESSAGE "The CMAKE_CXX_COMPILER must be set to the Intel LLVM compiler (icpx) in user code in order to use plssvm::stdpar!")
             return()
         endif ()
+        find_dependency(oneDPL)
     elseif (PLSSVM_STDPAR_BACKEND MATCHES "ACPP")
-        find_dependency(TBB)
         check_cxx_compiler_flag("--acpp-stdpar" PLSSVM_HAS_ACPP_STDPAR_FLAG)
         if (NOT PLSSVM_HAS_ACPP_STDPAR_FLAG)
             set(plssvm_FOUND OFF)
@@ -47,22 +56,54 @@ if (TARGET plssvm::plssvm-stdpar)
             set(plssvm_NOT_FOUND_MESSAGE "The CMAKE_CXX_COMPILER must be set to the AdaptiveCpp compiler (acpp) in user code in order to use plssvm::stdpar!")
             return()
         endif ()
-    elseif (PLSSVM_STDPAR_BACKEND MATCHES "GNU_TBB")
         find_dependency(TBB)
-        find_dependency(Boost COMPONENTS atomic)
+        set(ACPP_TARGETS "@ACPP_TARGETS@")
+        if (NOT plssvm_FIND_QUIETLY)
+            message(STATUS "Setting ACPP_TARGETS to \"${ACPP_TARGETS}\"")
+        endif ()
+    elseif (PLSSVM_STDPAR_BACKEND MATCHES "GNU_TBB")
         if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
             set(plssvm_FOUND OFF)
             set(plssvm_stdpar_FOUND OFF)
             set(plssvm_NOT_FOUND_MESSAGE "The CMAKE_CXX_COMPILER must be set to GNU GCC in user code in order to use plssvm::stdpar!")
             return()
         endif ()
+        find_dependency(TBB)
+        find_dependency(Boost COMPONENTS atomic)
+    else ()
+        message(FATAL_ERROR "Unrecognized stdpar implementation!")
     endif ()
 
     # set alias targets
+    add_library(plssvm::STDPAR ALIAS plssvm::plssvm-stdpar)
     add_library(plssvm::stdpar ALIAS plssvm::plssvm-stdpar)
+    
     # set COMPONENT to be found
     set(plssvm_stdpar_FOUND ON)
+    if (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_stdpar)
+            message(STATUS "Found optional component \"stdpar\".")
+        else ()
+            message(STATUS "Found component \"stdpar\".")
+        endif ()
+    endif ()
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_stdpar_FOUND OFF)
+    # PLSSVM is only not found if stdpar is NOT an optional component
+    if (NOT plssvm_FIND_OPTIONAL_stdpar)
+        set(plssvm_FOUND OFF)
+    endif ()
+    
+    # if REQUIRED was set in the find_package call, fail
+    if (plssvm_FIND_REQUIRED AND NOT plssvm_FIND_OPTIONAL_stdpar)
+        set(plssvm_NOT_FOUND_MESSAGE "Couldn't find required component \"stdpar\".")
+        return()
+    elseif (NOT plssvm_FIND_QUIETLY)
+        if (plssvm_FIND_OPTIONAL_stdpar)
+            message(STATUS "Couldn't find optional component \"stdpar\".")
+        else ()
+            message(STATUS "Couldn't find component \"stdpar\".")
+        endif ()
+    endif ()
 endif ()
diff --git a/cmake/presets/common.json b/cmake/presets/common.json
index 82bbea9e9..c2b63f127 100644
--- a/cmake/presets/common.json
+++ b/cmake/presets/common.json
@@ -26,7 +26,7 @@
       "hidden": true,
       "inherits": "common",
       "cacheVariables": {
-        "CMAKE_BUILD_TYPE": "RelWithDebInfo",
+        "CMAKE_BUILD_TYPE": "Release",
         "PLSSVM_ENABLE_FAST_MATH": "ON",
         "PLSSVM_ENABLE_ASSERTS": "OFF",
         "PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE": "OFF",
@@ -43,8 +43,8 @@
         "PLSSVM_ENABLE_ASSERTS": "ON",
         "PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE": "OFF",
         "PLSSVM_ENABLE_TESTING": "ON",
-        "PLSSVM_TEST_FILE_NUM_DATA_POINTS": "250",
-        "PLSSVM_TEST_FILE_NUM_FEATURES": "100",
+        "PLSSVM_TEST_FILE_NUM_DATA_POINTS": "25",
+        "PLSSVM_TEST_FILE_NUM_FEATURES": "10",
         "PLSSVM_TEST_WITH_REDUCED_LABEL_TYPES": "ON"
       }
     }
diff --git a/cmake/presets/dpcpp.json b/cmake/presets/dpcpp.json
index 5d4287767..d8d4efda4 100644
--- a/cmake/presets/dpcpp.json
+++ b/cmake/presets/dpcpp.json
@@ -4,31 +4,27 @@
   "configurePresets": [
     {
       "name": "dpcpp",
-      "displayName": "DPC++/icpx SYCL backend",
+      "displayName": "DPC++ SYCL backend",
       "inherits": "build",
       "cacheVariables": {
         "CMAKE_CXX_COMPILER": "clang++",
         "PLSSVM_ENABLE_SYCL_BACKEND": "ON",
         "PLSSVM_ENABLE_SYCL_ADAPTIVECPP_BACKEND": "OFF",
         "PLSSVM_ENABLE_SYCL_DPCPP_BACKEND": "ON",
-        "PLSSVM_SYCL_BACKEND_DPCPP_ENABLE_AOT": "ON",
         "PLSSVM_SYCL_BACKEND_DPCPP_USE_LEVEL_ZERO": "ON",
-        "PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_USE_HIP": "ON",
         "PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION": "dpcpp"
       }
     },
     {
       "name": "dpcpp_python",
-      "displayName": "DPC++/icpx backend + Python bindings",
+      "displayName": "DPC++ backend + Python bindings",
       "inherits": "build",
       "cacheVariables": {
         "CMAKE_CXX_COMPILER": "clang++",
         "PLSSVM_ENABLE_SYCL_BACKEND": "ON",
         "PLSSVM_ENABLE_SYCL_ADAPTIVECPP_BACKEND": "OFF",
         "PLSSVM_ENABLE_SYCL_DPCPP_BACKEND": "ON",
-        "PLSSVM_SYCL_BACKEND_DPCPP_ENABLE_AOT": "ON",
         "PLSSVM_SYCL_BACKEND_DPCPP_USE_LEVEL_ZERO": "ON",
-        "PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_USE_HIP": "ON",
         "PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION": "dpcpp",
         "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
         "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
@@ -36,16 +32,14 @@
     },
     {
       "name": "dpcpp_test",
-      "displayName": "DPC++/icpx backend tests",
+      "displayName": "DPC++ backend tests",
       "inherits": "test",
       "cacheVariables": {
         "CMAKE_CXX_COMPILER": "clang++",
         "PLSSVM_ENABLE_SYCL_BACKEND": "ON",
         "PLSSVM_ENABLE_SYCL_ADAPTIVECPP_BACKEND": "OFF",
         "PLSSVM_ENABLE_SYCL_DPCPP_BACKEND": "ON",
-        "PLSSVM_SYCL_BACKEND_DPCPP_ENABLE_AOT": "ON",
         "PLSSVM_SYCL_BACKEND_DPCPP_USE_LEVEL_ZERO": "ON",
-        "PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_USE_HIP": "ON",
         "PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION": "dpcpp"
       }
     }
@@ -53,21 +47,21 @@
   "buildPresets": [
     {
       "name": "dpcpp",
-      "displayName": "DPC++/icpx SYCL backend",
+      "displayName": "DPC++ SYCL backend",
       "configurePreset": "dpcpp",
       "configuration": "RelWithDebInfo",
       "inherits": "common"
     },
     {
       "name": "dpcpp_python",
-      "displayName": "DPC++/icpx backend + Python bindings",
+      "displayName": "DPC++ backend + Python bindings",
       "configurePreset": "dpcpp_python",
       "configuration": "RelWithDebInfo",
       "inherits": "common"
     },
     {
       "name": "dpcpp_test",
-      "displayName": "DPC++/icpx SYCL backend tests",
+      "displayName": "DPC++ SYCL backend tests",
       "configurePreset": "dpcpp_test",
       "configuration": "Debug",
       "inherits": "common"
@@ -76,13 +70,13 @@
   "testPresets": [
     {
       "name": "dpcpp_test",
-      "displayName": "DPC++/icpx SYCL backend all tests",
+      "displayName": "DPC++ SYCL backend all tests",
       "configurePreset": "dpcpp_test",
       "inherits": "common"
     },
     {
       "name": "dpcpp_backend_test",
-      "displayName": "DPC++/icpx SYCL backend specific tests",
+      "displayName": "DPC++ SYCL backend specific tests",
       "configurePreset": "dpcpp_test",
       "inherits": "common",
       "filter": {
@@ -95,7 +89,7 @@
   "workflowPresets": [
     {
       "name": "dpcpp",
-      "displayName": "DPC++/icpx SYCL backend workflow",
+      "displayName": "DPC++ SYCL backend workflow",
       "steps": [
         {
           "name": "dpcpp",
@@ -109,7 +103,7 @@
     },
     {
       "name": "dpcpp_python",
-      "displayName": "DPC++/icpx SYCL backend + Python bindings workflow",
+      "displayName": "DPC++ SYCL backend + Python bindings workflow",
       "steps": [
         {
           "name": "dpcpp_python",
@@ -123,7 +117,7 @@
     },
     {
       "name": "dpcpp_test",
-      "displayName": "DPC++/icpx SYCL test workflow",
+      "displayName": "DPC++ SYCL test workflow",
       "steps": [
         {
           "name": "dpcpp_test",
@@ -141,7 +135,7 @@
     },
     {
       "name": "dpcpp_backend_test",
-      "displayName": "DPC++/icpx SYCL backend test workflow",
+      "displayName": "DPC++ SYCL backend test workflow",
       "steps": [
         {
           "name": "dpcpp_test",
diff --git a/cmake/presets/icpx.json b/cmake/presets/icpx.json
new file mode 100644
index 000000000..ae509b5cf
--- /dev/null
+++ b/cmake/presets/icpx.json
@@ -0,0 +1,155 @@
+{
+  "version": 6,
+  "include": ["common.json"],
+  "configurePresets": [
+    {
+      "name": "icpx",
+      "displayName": "icpx SYCL backend",
+      "inherits": "build",
+      "cacheVariables": {
+        "CMAKE_CXX_COMPILER": "icpx",
+        "PLSSVM_ENABLE_SYCL_BACKEND": "ON",
+        "PLSSVM_ENABLE_SYCL_ADAPTIVECPP_BACKEND": "OFF",
+        "PLSSVM_ENABLE_SYCL_DPCPP_BACKEND": "ON",
+        "PLSSVM_SYCL_BACKEND_DPCPP_USE_LEVEL_ZERO": "ON",
+        "PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION": "dpcpp"
+      }
+    },
+    {
+      "name": "icpx_python",
+      "displayName": "icpx backend + Python bindings",
+      "inherits": "build",
+      "cacheVariables": {
+        "CMAKE_CXX_COMPILER": "icpx",
+        "PLSSVM_ENABLE_SYCL_BACKEND": "ON",
+        "PLSSVM_ENABLE_SYCL_ADAPTIVECPP_BACKEND": "OFF",
+        "PLSSVM_ENABLE_SYCL_DPCPP_BACKEND": "ON",
+        "PLSSVM_SYCL_BACKEND_DPCPP_USE_LEVEL_ZERO": "ON",
+        "PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION": "dpcpp",
+        "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
+        "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
+      }
+    },
+    {
+      "name": "icpx_test",
+      "displayName": "icpx backend tests",
+      "inherits": "test",
+      "cacheVariables": {
+        "CMAKE_CXX_COMPILER": "icpx",
+        "PLSSVM_ENABLE_SYCL_BACKEND": "ON",
+        "PLSSVM_ENABLE_SYCL_ADAPTIVECPP_BACKEND": "OFF",
+        "PLSSVM_ENABLE_SYCL_DPCPP_BACKEND": "ON",
+        "PLSSVM_SYCL_BACKEND_DPCPP_USE_LEVEL_ZERO": "ON",
+        "PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION": "dpcpp"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "icpx",
+      "displayName": "icpx SYCL backend",
+      "configurePreset": "icpx",
+      "configuration": "RelWithDebInfo",
+      "inherits": "common"
+    },
+    {
+      "name": "icpx_python",
+      "displayName": "icpx backend + Python bindings",
+      "configurePreset": "icpx_python",
+      "configuration": "RelWithDebInfo",
+      "inherits": "common"
+    },
+    {
+      "name": "icpx_test",
+      "displayName": "icpx SYCL backend tests",
+      "configurePreset": "icpx_test",
+      "configuration": "Debug",
+      "inherits": "common"
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "icpx_test",
+      "displayName": "icpx SYCL backend all tests",
+      "configurePreset": "icpx_test",
+      "inherits": "common"
+    },
+    {
+      "name": "icpx_backend_test",
+      "displayName": "icpx SYCL backend specific tests",
+      "configurePreset": "icpx_test",
+      "inherits": "common",
+      "filter": {
+        "include": {
+          "name": "DPCPP.*"
+        }
+      }
+    }
+  ],
+  "workflowPresets": [
+    {
+      "name": "icpx",
+      "displayName": "icpx SYCL backend workflow",
+      "steps": [
+        {
+          "name": "icpx",
+          "type": "configure"
+        },
+        {
+          "name": "icpx",
+          "type": "build"
+        }
+      ]
+    },
+    {
+      "name": "icpx_python",
+      "displayName": "icpx SYCL backend + Python bindings workflow",
+      "steps": [
+        {
+          "name": "icpx_python",
+          "type": "configure"
+        },
+        {
+          "name": "icpx_python",
+          "type": "build"
+        }
+      ]
+    },
+    {
+      "name": "icpx_test",
+      "displayName": "icpx SYCL test workflow",
+      "steps": [
+        {
+          "name": "icpx_test",
+          "type": "configure"
+        },
+        {
+          "name": "icpx_test",
+          "type": "build"
+        },
+        {
+          "name": "icpx_test",
+          "type": "test"
+        }
+      ]
+    },
+    {
+      "name": "icpx_backend_test",
+      "displayName": "icpx SYCL backend test workflow",
+      "steps": [
+        {
+          "name": "icpx_test",
+          "type": "configure"
+        },
+        {
+          "name": "icpx_test",
+          "type": "build"
+        },
+        {
+          "name": "icpx_backend_test",
+          "type": "test"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 51656c84a..072b8873e 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -61,9 +61,11 @@ endif ()
 
 # add doxygen as target
 doxygen_add_docs(
-    doc "${PROJECT_SOURCE_DIR}/include;${PROJECT_SOURCE_DIR}/docs/resources;${PROJECT_SOURCE_DIR}/README.md;${PROJECT_SOURCE_DIR}/bindings/Python/README.md"
+    doc
+    "${PROJECT_SOURCE_DIR}/include;${PROJECT_SOURCE_DIR}/docs/resources;${PROJECT_SOURCE_DIR}/README.md;${PROJECT_SOURCE_DIR}/bindings/Python/README.md;${PROJECT_SOURCE_DIR}/examples/python/interactive/README.md"
     ALL # add to the default build target
-    WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" COMMENT "Generating API documentation with Doxygen"
+    WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+    COMMENT "Generating API documentation with Doxygen"
 )
 
 # create shortcut for index.html
diff --git a/docs/resources/dirs.dox b/docs/resources/dirs.dox
index fd23efcbc..dcb4337d0 100644
--- a/docs/resources/dirs.dox
+++ b/docs/resources/dirs.dox
@@ -543,6 +543,16 @@
  * @brief Directory containing implementation details for the SYCL backend using AdaptiveCpp as SYCL implementation.
  */
 
+/**
+ * @dir include/plssvm/data_set
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing implementations for the different data sets.
+ */
 
 /**
  * @dir include/plssvm/detail
@@ -577,6 +587,17 @@
  * @brief Directory containing implementation details regarding the IO functionality which **should not** be used by users.
  */
 
+/**
+ * @dir include/plssvm/detail/logging
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing implementation details regarding the different logging functions which **should not** be used by users.
+ */
+
 /**
  * @dir include/plssvm/detail/tracking
  * @author Alexander Van Craen
@@ -599,6 +620,50 @@
  * @brief Directory containing custom exception types used to be able to better distinguish errors.
  */
 
+/**
+ * @dir include/plssvm/model
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing implementations for the different models.
+ */
+
+/**
+ * @dir include/plssvm/mpi
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing the implementation regarding the MPI wrapper functionality.
+ */
+
+/**
+ * @dir include/plssvm/mpi/detail
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing implementation details regarding the MPI wrapper functionality which **should not** be used by users.
+ */
+
+/**
+ * @dir include/plssvm/svm
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing implementations for the different C-SVMs.
+ */
+
 /**
  * @dir include/plssvm/version
  * @author Alexander Van Craen
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index 09b3f3d14..69d15e95c 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -1,15 +1,22 @@
 cmake_minimum_required(VERSION 3.16)
 
-project(LibraryUsageExample LANGUAGES CXX)
+project(LibraryUsageExamples LANGUAGES CXX)
 
 find_package(plssvm CONFIG REQUIRED)
+# CMake's COMPONENTS mechanism can also be used if a specific library component is required, e.g.: find_package(plssvm REQUIRED COMPONENTS CUDA)
 
 # classification executable example
 add_executable(classification main_classification.cpp)
-target_compile_features(classification PUBLIC cxx_std_17)
-target_link_libraries(classification PUBLIC plssvm::plssvm-all)
-
+# classification executable example using MPI
+add_executable(classification_mpi main_classification_mpi.cpp)
 # regression executable example
 add_executable(regression main_regression.cpp)
-target_compile_features(regression PUBLIC cxx_std_17)
-target_link_libraries(regression PUBLIC plssvm::plssvm-all)
+# regression executable example using MPI
+add_executable(regression_mpi main_regression_mpi.cpp)
+
+# link PLSSVM against executables
+foreach (target classification classification_mpi regression regression_mpi)
+    target_compile_features(${target} PUBLIC cxx_std_17)
+    target_link_libraries(${target} PUBLIC plssvm::plssvm)
+    # can also only link against a single library component, e.g.: target_link_libraries(${target} PUBLIC plssvm::cuda)
+endforeach ()
diff --git a/examples/cpp/main_classification.cpp b/examples/cpp/main_classification.cpp
index aab4ab2ed..a6f6f5f4f 100644
--- a/examples/cpp/main_classification.cpp
+++ b/examples/cpp/main_classification.cpp
@@ -30,7 +30,8 @@ int main() {
         // predict the labels
         const std::vector<int> predicted_label = svc->predict(model, test_data);
         // output a more complete classification report
-        const std::vector<int> &correct_label = test_data.labels().value();
+        const auto &labels_opt = test_data.labels();  // std::optional<std::reference_wrapper<std::vector<label_type>>>
+        const std::vector<int> &correct_label = labels_opt.value().get();
         std::cout << plssvm::classification_report{ correct_label, predicted_label } << std::endl;
 
         // write model file to disk
diff --git a/examples/cpp/main_classification_mpi.cpp b/examples/cpp/main_classification_mpi.cpp
new file mode 100644
index 000000000..8904ca545
--- /dev/null
+++ b/examples/cpp/main_classification_mpi.cpp
@@ -0,0 +1,57 @@
+#include "plssvm/core.hpp"
+
+#include <exception>
+#include <iostream>
+#include <vector>
+
+int main() {
+    // correctly initialize and finalize environments
+    plssvm::environment::scope_guard environment_guard{};
+
+    // create PLSSVM MPI communicator
+    plssvm::mpi::communicator comm{};
+
+    try {
+        // create a new C-SVM parameter set, explicitly overriding the default kernel function
+        const plssvm::parameter params{ plssvm::kernel_type = plssvm::kernel_function_type::polynomial };
+
+        // create two data sets: one with the training data scaled to [-1, 1]
+        // and one with the test data scaled like the training data
+        const plssvm::classification_data_set train_data{ comm, "train_file.libsvm", plssvm::min_max_scaler{ comm, -1.0, 1.0 } };
+        const plssvm::classification_data_set test_data{ comm, "test_file.libsvm", train_data.scaling_factors()->get() };
+
+        // create C-SVC using the default backend and the previously defined parameter
+        const auto svc = plssvm::make_csvc(comm, params);
+
+        // fit using the training data, (optionally) set the termination criterion
+        const plssvm::classification_model model = svc->fit(train_data, plssvm::epsilon = 1e-6);
+
+        // note: be sure to output results only on main rank
+        // get accuracy of the trained model
+        const double model_accuracy = svc->score(model);
+        if (comm.is_main_rank()) {
+            std::cout << "model accuracy: " << model_accuracy << std::endl;
+        }
+
+        // predict the labels
+        const std::vector<int> predicted_label = svc->predict(model, test_data);
+        // output a more complete classification report
+        const auto &labels_opt = test_data.labels();  // std::optional<std::reference_wrapper<std::vector<label_type>>>
+        const std::vector<int> &correct_label = labels_opt.value().get();
+        if (comm.is_main_rank()) {
+            std::cout << plssvm::classification_report{ correct_label, predicted_label } << std::endl;
+        }
+
+        // write model file to disk
+        if (comm.is_main_rank()) {
+            model.save("model_file.libsvm");
+        }
+
+    } catch (const plssvm::exception &e) {
+        std::cerr << e.what_with_loc() << std::endl;
+    } catch (const std::exception &e) {
+        std::cerr << e.what() << std::endl;
+    }
+
+    return 0;
+}
diff --git a/examples/cpp/main_regression.cpp b/examples/cpp/main_regression.cpp
index a9da4dc27..ffb821ba4 100644
--- a/examples/cpp/main_regression.cpp
+++ b/examples/cpp/main_regression.cpp
@@ -14,8 +14,8 @@ int main() {
 
         // create two data sets: one with the training data scaled to [-1, 1]
         // and one with the test data scaled like the training data
-        const plssvm::regression_data_set train_data{ "train_file.libsvm", { -1.0, 1.0 } };
-        const plssvm::regression_data_set test_data{ "test_file.libsvm", train_data.scaling_factors()->get() };
+        const plssvm::regression_data_set train_data{ "train_file_reg.libsvm", { -1.0, 1.0 } };
+        const plssvm::regression_data_set test_data{ "test_file_reg.libsvm", train_data.scaling_factors()->get() };
 
         // create C-SVR using the default backend and the previously defined parameter
         const auto svr = plssvm::make_csvr(params);
@@ -26,7 +26,8 @@ int main() {
         // predict the labels
         const std::vector<plssvm::real_type> predicted_label = svr->predict(model, test_data);
         // output a more complete regression report
-        const std::vector<plssvm::real_type> &correct_label = test_data.labels().value();
+        const auto &labels_opt = test_data.labels();  // std::optional<std::reference_wrapper<std::vector<label_type>>>
+        const std::vector<plssvm::real_type> &correct_label = labels_opt.value().get();
         std::cout << plssvm::regression_report{ correct_label, predicted_label } << std::endl;
 
         // write model file to disk
diff --git a/examples/cpp/main_regression_mpi.cpp b/examples/cpp/main_regression_mpi.cpp
new file mode 100644
index 000000000..5026dc7ec
--- /dev/null
+++ b/examples/cpp/main_regression_mpi.cpp
@@ -0,0 +1,51 @@
+#include "plssvm/core.hpp"
+
+#include <exception>
+#include <iostream>
+#include <vector>
+
+int main() {
+    // correctly initialize and finalize environments
+    plssvm::environment::scope_guard environment_guard{};
+
+    // create PLSSVM MPI communicator
+    plssvm::mpi::communicator comm{};
+
+    try {
+        // create a new C-SVM parameter set, explicitly overriding the default kernel function
+        const plssvm::parameter params{ plssvm::kernel_type = plssvm::kernel_function_type::polynomial };
+
+        // create two data sets: one with the training data scaled to [-1, 1]
+        // and one with the test data scaled like the training data
+        const plssvm::regression_data_set train_data{ comm, "train_file_reg.libsvm", plssvm::min_max_scaler{ comm, -1.0, 1.0 } };
+        const plssvm::regression_data_set test_data{ comm, "test_file_reg.libsvm", train_data.scaling_factors()->get() };
+
+        // create C-SVR using the default backend and the previously defined parameter
+        const auto svr = plssvm::make_csvr(comm, params);
+
+        // fit using the training data, (optionally) set the termination criterion
+        const plssvm::regression_model model = svr->fit(train_data, plssvm::epsilon = 1e-6);
+
+        // note: be sure to output results only on main rank
+        // predict the labels
+        const std::vector<plssvm::real_type> predicted_label = svr->predict(model, test_data);
+        // output a more complete regression report
+        const auto &labels_opt = test_data.labels();  // std::optional<std::reference_wrapper<std::vector<label_type>>>
+        const std::vector<plssvm::real_type> &correct_label = labels_opt.value().get();
+        if (comm.is_main_rank()) {
+            std::cout << plssvm::regression_report{ correct_label, predicted_label } << std::endl;
+        }
+
+        // write model file to disk
+        if (comm.is_main_rank()) {
+            model.save("model_file.libsvm");
+        }
+
+    } catch (const plssvm::exception &e) {
+        std::cerr << e.what_with_loc() << std::endl;
+    } catch (const std::exception &e) {
+        std::cerr << e.what() << std::endl;
+    }
+
+    return 0;
+}
diff --git a/examples/python/interactive/svc/svc.py b/examples/python/interactive/svc/svc.py
index 684e6eb15..b30db0ce0 100644
--- a/examples/python/interactive/svc/svc.py
+++ b/examples/python/interactive/svc/svc.py
@@ -87,8 +87,8 @@ def __init__(self, model, X_train, y_train):
 
     def train_models(svm_params, X_train, y_train):
         """Train the SVMs and compute the decision boundaries."""
-        # train plssvm.SVC
-        trained_plssvm_model = SVCModel(plssvm.SVC(**svm_params), X_train, y_train)
+        # train plssvm.svm.SVC
+        trained_plssvm_model = SVCModel(plssvm.svm.SVC(**svm_params), X_train, y_train)
 
         # if the laplacian kernel is selected, we can't train the sklearn model
         if svm_params["kernel"] == "laplacian":
@@ -112,7 +112,7 @@ def train_models(svm_params, X_train, y_train):
     sklearn_classification_report, sklearn_classification_report_source = create_classification_report_plot(y, sklearn_pred)
     sklearn_text = Div(text=f"score: {sklearn_model.model.score(X, y) * 100:.2f}%<br>runtime: {sklearn_model.time:.2f}ms", styles={'font-size': '16px', 'color': 'black'})
 
-    plssvm_decision_boundary_fig, plssvm_decision_boundary, plssvm_decision_boundary_source, plssvm_data_source = create_decision_boundary_plot("plssvm.SVC",
+    plssvm_decision_boundary_fig, plssvm_decision_boundary, plssvm_decision_boundary_source, plssvm_data_source = create_decision_boundary_plot("plssvm.svm.SVC",
                                                                                                                                                 plssvm_model, X, y)
     plssvm_pred = plssvm_model.model.predict(X)
     plssvm_confusion_matrix, plssvm_confusion_matrix_source = create_confusion_matrix_plot(y, plssvm_pred)
diff --git a/examples/python/interactive/svr/svr.py b/examples/python/interactive/svr/svr.py
index cb3753aef..9a94b244d 100644
--- a/examples/python/interactive/svr/svr.py
+++ b/examples/python/interactive/svr/svr.py
@@ -66,8 +66,8 @@ def __init__(self, model, X_train, y_train):
 
     def train_models(svm_params, X_train, y_train):
         """Train the SVMs and compute the decision boundaries."""
-        # train plssvm.SVR
-        trained_plssvm_model = SVRModel(plssvm.SVR(**svm_params), X_train, y_train)
+        # train plssvm.svm.SVR
+        trained_plssvm_model = SVRModel(plssvm.svm.SVR(**svm_params), X_train, y_train)
 
         # if the laplacian kernel is selected, we can't train the sklearn model
         if svm_params["kernel"] == "laplacian":
@@ -90,7 +90,7 @@ def train_models(svm_params, X_train, y_train):
     sklearn_text = Div(text=f"score: {r2_score(y, sklearn_pred):.3f}<br>runtime: {sklearn_model.time:.2f}ms",
                        styles={'font-size': '16px', 'color': 'black'})
 
-    plssvm_fig, plssvm_plot, plssvm_plot_source, plssvm_data_source, plssvm_pred = create_plot("plssvm.SVR", plssvm_model, X, y)
+    plssvm_fig, plssvm_plot, plssvm_plot_source, plssvm_data_source, plssvm_pred = create_plot("plssvm.svm.SVR", plssvm_model, X, y)
     plssvm_prediction_vs_actual, plssvm_prediction_vs_actual_source, plssvm_prediction_vs_actual_bisector_source = create_prediction_vs_actual_plot(y, plssvm_pred)
     plssvm_regression_report, plssvm_regression_report_source = create_regression_report_plot(y, plssvm_pred)
     plssvm_text = Div(text=f"score: {r2_score(y, plssvm_pred):.3f}<br>runtime: {plssvm_model.time:.2f}ms",
diff --git a/examples/python/main_classification.py b/examples/python/main_classification.py
index 1de47a6ab..0e51003de 100644
--- a/examples/python/main_classification.py
+++ b/examples/python/main_classification.py
@@ -23,7 +23,7 @@
     test_data = plssvm.ClassificationDataSet("test_file.libsvm", type=np.int32, scaler=train_data.scaling_factors())
 
     # create C-SVC using the default backend and the previously defined parameter
-    svm = plssvm.CSVC(params)
+    svm = plssvm.CSVC(params=params)
 
     # fit using the training data, (optionally) set the termination criterion
     model = svm.fit(train_data, epsilon=1e-6)
diff --git a/examples/python/main_classification_mpi.py b/examples/python/main_classification_mpi.py
new file mode 100644
index 000000000..b42bb921a
--- /dev/null
+++ b/examples/python/main_classification_mpi.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+########################################################################################################################
+# Authors: Alexander Van Craen, Marcel Breyer                                                                          #
+# Copyright (C): 2018-today The PLSSVM project - All Rights Reserved                                                   #
+# License: This file is part of the PLSSVM project which is released under the MIT license.                            #
+#          See the LICENSE.md file in the project root for full license information.                                   #
+########################################################################################################################
+
+import plssvm
+from sklearn.metrics import classification_report
+import numpy as np
+from mpi4py import MPI
+import sys
+
+try:
+    # create a new C-SVM parameter set, explicitly overriding the default kernel function
+    params = plssvm.Parameter(kernel_type=plssvm.KernelFunctionType.POLYNOMIAL)
+
+    # create two data sets: one with the training data scaled to [-1, 1]
+    # and one with the test data scaled like the training data
+    train_data = plssvm.ClassificationDataSet("train_file.libsvm", type=np.int32,
+                                              scaler=plssvm.MinMaxScaler(-1.0, 1.0, comm=MPI.COMM_WORLD),
+                                              comm=MPI.COMM_WORLD)
+    test_data = plssvm.ClassificationDataSet("test_file.libsvm", type=np.int32, scaler=train_data.scaling_factors(),
+                                             comm=MPI.COMM_WORLD)
+
+    # create C-SVC using the default backend and the previously defined parameter
+    svm = plssvm.CSVC(params=params, comm=MPI.COMM_WORLD)
+
+    # fit using the training data, (optionally) set the termination criterion
+    model = svm.fit(train_data, epsilon=1e-6)
+
+    # note: be sure to output results only on main rank
+    # get accuracy of the trained model
+    model_accuracy = svm.score(model)
+    if MPI.COMM_WORLD.rank == 0:
+        print("model accuracy: {}".format(model_accuracy))
+
+    # predict labels
+    predicted_label = svm.predict(model, test_data)
+    # output a more complete classification report
+    correct_label = test_data.labels()
+    if MPI.COMM_WORLD.rank == 0:
+        print(classification_report(correct_label, predicted_label))
+
+    # write model file to disk
+    if MPI.COMM_WORLD.rank == 0:
+        model.save("model_file.libsvm")
+except plssvm.PLSSVMError as e:
+    print(e)
+    sys.exit(1)
+except RuntimeError as e:
+    print(e)
+    sys.exit(1)
diff --git a/examples/python/main_regression.py b/examples/python/main_regression.py
index e1ddf0c23..d3fa88bc4 100644
--- a/examples/python/main_regression.py
+++ b/examples/python/main_regression.py
@@ -22,7 +22,7 @@
     test_data = plssvm.RegressionDataSet("test_file_reg.libsvm", scaler=train_data.scaling_factors())
 
     # create C-SVR using the default backend and the previously defined parameter
-    svm = plssvm.CSVR(params)
+    svm = plssvm.CSVR(params=params)
 
     # fit using the training data, (optionally) set the termination criterion
     model = svm.fit(train_data, epsilon=1e-6)
@@ -35,7 +35,6 @@
     predicted_label = svm.predict(model, test_data)
     # output a more complete regression report
     correct_label = test_data.labels()
-    correct_label = [int(l) for l in correct_label]
     print(regression_report(correct_label, predicted_label))
 
     # write model file to disk
diff --git a/examples/python/main_regression_mpi.py b/examples/python/main_regression_mpi.py
new file mode 100644
index 000000000..6fcfc430b
--- /dev/null
+++ b/examples/python/main_regression_mpi.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+########################################################################################################################
+# Authors: Alexander Van Craen, Marcel Breyer                                                                          #
+# Copyright (C): 2018-today The PLSSVM project - All Rights Reserved                                                   #
+# License: This file is part of the PLSSVM project which is released under the MIT license.                            #
+#          See the LICENSE.md file in the project root for full license information.                                   #
+########################################################################################################################
+
+import plssvm
+from plssvm import regression_report
+from mpi4py import MPI
+import sys
+
+try:
+    # create a new C-SVM parameter set, explicitly overriding the default kernel function
+    params = plssvm.Parameter(kernel_type=plssvm.KernelFunctionType.POLYNOMIAL)
+
+    # create two data sets: one with the training data scaled to [-1, 1]
+    # and one with the test data scaled like the training data
+    train_data = plssvm.RegressionDataSet("train_file_reg.libsvm", scaler=plssvm.MinMaxScaler(-1.0, 1.0, comm=MPI.COMM_WORLD), comm=MPI.COMM_WORLD)
+    test_data = plssvm.RegressionDataSet("test_file_reg.libsvm", scaler=train_data.scaling_factors(), comm=MPI.COMM_WORLD)
+
+    # create C-SVR using the default backend and the previously defined parameter
+    svm = plssvm.CSVR(params=params, comm=MPI.COMM_WORLD)
+
+    # fit using the training data, (optionally) set the termination criterion
+    model = svm.fit(train_data, epsilon=1e-6)
+
+    # note: be sure to output results only on main rank
+    # get accuracy of the trained model
+    model_accuracy = svm.score(model)
+    if MPI.COMM_WORLD.rank == 0:
+        print("model accuracy: {}".format(model_accuracy))
+
+    # predict labels
+    predicted_label = svm.predict(model, test_data)
+    # output a more complete regression report
+    correct_label = test_data.labels()
+    if MPI.COMM_WORLD.rank == 0:
+        print(regression_report(correct_label, predicted_label))
+
+    # write model file to disk
+    if MPI.COMM_WORLD.rank == 0:
+        model.save("model_file.libsvm")
+except plssvm.PLSSVMError as e:
+    print(e)
+    sys.exit(1)
+except RuntimeError as e:
+    print(e)
+    sys.exit(1)
diff --git a/examples/python/sklearn/plot_classifier_comparison.py b/examples/python/sklearn/plot_classifier_comparison.py
index 7b8f89f2c..85285e7b5 100644
--- a/examples/python/sklearn/plot_classifier_comparison.py
+++ b/examples/python/sklearn/plot_classifier_comparison.py
@@ -20,10 +20,10 @@
     ("Linear SVM (ovo)", sklearn.svm.SVC(kernel="linear", C=0.025, random_state=42, decision_function_shape='ovo')),
     ("RBF SVM (ovr)", sklearn.svm.SVC(gamma=2, C=1, random_state=42, decision_function_shape='ovr')),
     ("RBF SVM (ovo)", sklearn.svm.SVC(gamma=2, C=1, random_state=42, decision_function_shape='ovo')),
-    ("PLSSVM Linear (ovr)", plssvm.SVC(kernel="linear", decision_function_shape='ovr', C=0.025)),
-    ("PLSSVM Linear (ovo)", plssvm.SVC(kernel="linear", decision_function_shape='ovo', C=0.025)),
-    ("PLSSVM RBF (ovr)", plssvm.SVC(gamma=2, decision_function_shape='ovr', C=1)),
-    ("PLSSVM RBF (ovo)", plssvm.SVC(gamma=2, decision_function_shape='ovo', C=1)),
+    ("PLSSVM Linear (ovr)", plssvm.svm.SVC(kernel="linear", decision_function_shape='ovr', C=0.025)),
+    ("PLSSVM Linear (ovo)", plssvm.svm.SVC(kernel="linear", decision_function_shape='ovo', C=0.025)),
+    ("PLSSVM RBF (ovr)", plssvm.svm.SVC(gamma=2, decision_function_shape='ovr', C=1)),
+    ("PLSSVM RBF (ovo)", plssvm.svm.SVC(gamma=2, decision_function_shape='ovo', C=1)),
     ("Neural Net", MLPClassifier(alpha=1, max_iter=1000, random_state=42)),
 ]
 
diff --git a/examples/python/sklearn/plot_decision_boundaries_via_coef_and_intercept.py b/examples/python/sklearn/plot_decision_boundaries_via_coef_and_intercept.py
index 501d5a8dc..7be74cb84 100644
--- a/examples/python/sklearn/plot_decision_boundaries_via_coef_and_intercept.py
+++ b/examples/python/sklearn/plot_decision_boundaries_via_coef_and_intercept.py
@@ -45,7 +45,7 @@ def test(model, svc_name, axis):
 test(sklearn_model, "sklearn.svm.SVC", ax[0])
 
 # train using PLSSVM
-from plssvm import SVC
+from plssvm.svm import SVC
 
 plssvm_model = SVC(kernel="linear", decision_function_shape="ovr")
 test(plssvm_model, "plssvm.SVC", ax[1])
diff --git a/examples/python/sklearn/plot_decision_boundary_confidence.py b/examples/python/sklearn/plot_decision_boundary_confidence.py
index 3dc9c926c..0faf806c0 100644
--- a/examples/python/sklearn/plot_decision_boundary_confidence.py
+++ b/examples/python/sklearn/plot_decision_boundary_confidence.py
@@ -1,6 +1,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
-import sklearn
+import sklearn.svm
 import plssvm
 from sklearn.datasets import make_classification
 from sklearn.preprocessing import StandardScaler
@@ -57,10 +57,10 @@ def create_plot(clf, axis):
         Z_confidence = Z_confidence.reshape(xx.shape)
 
         # plot the decision boundaries (class regions)
-        axis[ax_idx].pcolormesh(xx, yy, Z_pred, alpha=0.3)
+        axis[ax_idx].pcolormesh(xx, yy, Z_pred, alpha=0.3, shading="auto")
 
         # overlay confidence as a grayscale gradient
-        cb_values = axis[ax_idx].pcolormesh(xx, yy, Z_confidence, cmap="Greys", alpha=0.45)
+        cb_values = axis[ax_idx].pcolormesh(xx, yy, Z_confidence, cmap="Greys", alpha=0.45, shading="auto")
         fig.colorbar(cb_values, ax=axis[ax_idx], label="confidence")
 
         # plot training data points
@@ -78,7 +78,7 @@ def create_plot(clf, axis):
 create_plot(sklearn_svc, ax[0, :])
 
 # fit PLSSVM
-plssvm_svc = plssvm.SVC(kernel='rbf', C=10)
+plssvm_svc = plssvm.svm.SVC(kernel='rbf', C=10)
 create_plot(plssvm_svc, ax[1, :])
 
 plt.tight_layout()
diff --git a/examples/python/sklearn/plot_different_classifiers.py b/examples/python/sklearn/plot_different_classifiers.py
index e9f70642d..e86b0efa4 100644
--- a/examples/python/sklearn/plot_different_classifiers.py
+++ b/examples/python/sklearn/plot_different_classifiers.py
@@ -1,6 +1,6 @@
 import matplotlib.pyplot as plt
 
-import plssvm as svm
+from plssvm import svm
 from sklearn import datasets
 from sklearn.inspection import DecisionBoundaryDisplay
 
diff --git a/examples/python/sklearn/plot_digits_classification.py b/examples/python/sklearn/plot_digits_classification.py
index ee52c1f9b..6a91d7483 100644
--- a/examples/python/sklearn/plot_digits_classification.py
+++ b/examples/python/sklearn/plot_digits_classification.py
@@ -17,7 +17,7 @@
 # Import datasets, classifiers and performance metrics
 from sklearn import datasets, metrics
 from sklearn.model_selection import train_test_split
-import plssvm as svm
+from plssvm import svm
 
 ###############################################################################
 # Digits dataset
diff --git a/examples/python/sklearn/plot_face_recognition.py b/examples/python/sklearn/plot_face_recognition.py
index 2f6f61d3b..4b72d0b5d 100644
--- a/examples/python/sklearn/plot_face_recognition.py
+++ b/examples/python/sklearn/plot_face_recognition.py
@@ -23,7 +23,7 @@
 from sklearn.metrics import ConfusionMatrixDisplay, classification_report
 from sklearn.model_selection import RandomizedSearchCV, train_test_split
 from sklearn.preprocessing import StandardScaler
-from plssvm import SVC
+from plssvm.svm import SVC
 
 # %%
 # Download the data, if not already on disk and load it as numpy arrays
diff --git a/examples/python/sklearn/plot_feature_discretization.py b/examples/python/sklearn/plot_feature_discretization.py
index e6d48df7a..4aa696b1e 100644
--- a/examples/python/sklearn/plot_feature_discretization.py
+++ b/examples/python/sklearn/plot_feature_discretization.py
@@ -12,7 +12,7 @@
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import KBinsDiscretizer, StandardScaler
-from plssvm import SVC
+from plssvm.svm import SVC
 from sklearn.utils._testing import ignore_warnings
 
 h = 0.02  # step size in the mesh
diff --git a/examples/python/sklearn/plot_rbf_parameters.py b/examples/python/sklearn/plot_rbf_parameters.py
index b45ebd193..1060dffcd 100644
--- a/examples/python/sklearn/plot_rbf_parameters.py
+++ b/examples/python/sklearn/plot_rbf_parameters.py
@@ -139,7 +139,7 @@ def __call__(self, value, clip=None):
 # tuning can be achieved but at a much higher cost.
 
 from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
-from sklearn.svm import SVC
+from plssvm.svm import SVC
 
 C_range = np.logspace(-2, 10, 13)
 gamma_range = np.logspace(-9, 3, 13)
diff --git a/examples/python/sklearn/plot_rbf_parameters_3_classes.py b/examples/python/sklearn/plot_rbf_parameters_3_classes.py
index f46ee3849..cf56b3c00 100644
--- a/examples/python/sklearn/plot_rbf_parameters_3_classes.py
+++ b/examples/python/sklearn/plot_rbf_parameters_3_classes.py
@@ -138,7 +138,7 @@ def __call__(self, value, clip=None):
 # tuning can be achieved but at a much higher cost.
 
 from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
-from plssvm import SVC
+from plssvm.svm import SVC
 
 C_range = np.logspace(-2, 10, 13)
 gamma_range = np.logspace(-9, 3, 13)
diff --git a/examples/python/sklearn/plot_separating_hyperplane.py b/examples/python/sklearn/plot_separating_hyperplane.py
index 6c64eb04e..7e59ad519 100644
--- a/examples/python/sklearn/plot_separating_hyperplane.py
+++ b/examples/python/sklearn/plot_separating_hyperplane.py
@@ -14,7 +14,7 @@
 
 import matplotlib.pyplot as plt
 
-import plssvm as svm
+from plssvm import svm
 from sklearn.datasets import make_blobs
 from sklearn.inspection import DecisionBoundaryDisplay
 
diff --git a/examples/python/sklearn/plot_svm_anova.py b/examples/python/sklearn/plot_svm_anova.py
index efa32fcae..637318d6e 100644
--- a/examples/python/sklearn/plot_svm_anova.py
+++ b/examples/python/sklearn/plot_svm_anova.py
@@ -32,7 +32,7 @@
 from sklearn.feature_selection import SelectPercentile, f_classif
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
-from plssvm import SVC
+from plssvm.svm import SVC
 
 # Create a feature-selection transform, a scaler and an instance of SVM that we
 # combine to have a full-blown estimator
diff --git a/examples/python/sklearn/plot_svm_kernels.py b/examples/python/sklearn/plot_svm_kernels.py
index 1217506be..518dce797 100644
--- a/examples/python/sklearn/plot_svm_kernels.py
+++ b/examples/python/sklearn/plot_svm_kernels.py
@@ -106,7 +106,7 @@
 # Finally, the support vectors used during training (which always lay on the
 # margins) are identified by means of the `support_vectors_` attribute of
 # the trained SVCs, and plotted as well.
-import plssvm as svm
+from plssvm import svm
 from sklearn.inspection import DecisionBoundaryDisplay
 import matplotlib as mpl
 
diff --git a/examples/python/sklearn/plot_svm_margin.py b/examples/python/sklearn/plot_svm_margin.py
index fbd17e903..8b44c07d7 100644
--- a/examples/python/sklearn/plot_svm_margin.py
+++ b/examples/python/sklearn/plot_svm_margin.py
@@ -19,7 +19,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-import plssvm as svm
+from plssvm import svm
 
 # we create 40 separable points
 np.random.seed(0)
diff --git a/examples/python/sklearn/plot_svm_regression.py b/examples/python/sklearn/plot_svm_regression.py
index 3141ac166..2f1d707a6 100644
--- a/examples/python/sklearn/plot_svm_regression.py
+++ b/examples/python/sklearn/plot_svm_regression.py
@@ -13,7 +13,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-from plssvm import SVR
+from plssvm.svm import SVR
 
 
 def one_div_x(data):
diff --git a/examples/python/sklearn/real_world/plot_SVHN.py b/examples/python/sklearn/real_world/plot_SVHN.py
index 5dbbc9376..742806415 100644
--- a/examples/python/sklearn/real_world/plot_SVHN.py
+++ b/examples/python/sklearn/real_world/plot_SVHN.py
@@ -1,5 +1,5 @@
 import matplotlib.pyplot as plt
-from plssvm import SVC
+from plssvm.svm import SVC
 import seaborn as sns
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 from sklearn.datasets import load_svmlight_file
diff --git a/examples/python/sklearn/real_world/plot_california_housing.py b/examples/python/sklearn/real_world/plot_california_housing.py
index 74a621076..873131433 100644
--- a/examples/python/sklearn/real_world/plot_california_housing.py
+++ b/examples/python/sklearn/real_world/plot_california_housing.py
@@ -4,7 +4,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
 from plssvm import SVR
-import plssvm
+import plssvm.svm
 from sklearn.metrics import mean_squared_error, r2_score
 import time
 
diff --git a/examples/python/sklearn/real_world/plot_fashion_MNIST.py b/examples/python/sklearn/real_world/plot_fashion_MNIST.py
index 9c013823a..0d1cd4cb8 100644
--- a/examples/python/sklearn/real_world/plot_fashion_MNIST.py
+++ b/examples/python/sklearn/real_world/plot_fashion_MNIST.py
@@ -4,7 +4,7 @@
 from tensorflow.keras.datasets import fashion_mnist
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
-from plssvm import SVC
+from plssvm.svm import SVC
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 import time
 
diff --git a/examples/python/sklearn_like_svc.py b/examples/python/sklearn_like_svc.py
index 59e67d8db..e9d8934c3 100644
--- a/examples/python/sklearn_like_svc.py
+++ b/examples/python/sklearn_like_svc.py
@@ -13,7 +13,7 @@
 import sklearn.metrics
 import sklearn.inspection
 import numpy as np
-from plssvm import SVC
+from plssvm.svm import SVC
 
 # load the breast cancer datasets
 cancer = sklearn.datasets.load_breast_cancer()
diff --git a/examples/python/sklearn_like_svr.py b/examples/python/sklearn_like_svr.py
index 792ec8af7..e44d83253 100644
--- a/examples/python/sklearn_like_svr.py
+++ b/examples/python/sklearn_like_svr.py
@@ -36,7 +36,7 @@
 plt.plot(X, y_rbf_sklearn, lw=2, linestyle='dashed', label='RBF model sklearn')
 
 # fit the PLSSVM regression model
-from plssvm import SVR
+from plssvm.svm import SVR
 
 plssvm_svr_lin = SVR(kernel='linear', C=100)
 y_lin_plssvm = plssvm_svr_lin.fit(X, y).predict(X)
diff --git a/include/plssvm/backends/CUDA/csvm.hpp b/include/plssvm/backends/CUDA/csvm.hpp
index 204f1b339..ec02c80c0 100644
--- a/include/plssvm/backends/CUDA/csvm.hpp
+++ b/include/plssvm/backends/CUDA/csvm.hpp
@@ -20,6 +20,7 @@
 #include "plssvm/constants.hpp"                           // plssvm::real_type
 #include "plssvm/detail/memory_size.hpp"                  // plssvm::detail::memory_size
 #include "plssvm/detail/type_traits.hpp"                  // PLSSVM_REQUIRES, plssvm::detail::is_one_type_of
+#include "plssvm/mpi/communicator.hpp"                    // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                           // plssvm::parameter, plssvm::detail::has_only_parameter_named_args_v
 #include "plssvm/svm/csvc.hpp"                            // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                            // plssvm::detail::csvm_backend_exists
@@ -152,17 +153,38 @@ class csvc : public ::plssvm::csvc,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvc(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::cuda::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVC using the CUDA backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::cuda::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the CUDA backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::cuda::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVC using the CUDA backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVC
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvc(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvc(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::cuda::csvm{ target } { }
 
     /**
@@ -172,7 +194,18 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::cuda::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the CUDA backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvc(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::cuda::csvm{} { }
 
     /**
@@ -183,7 +216,19 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::cuda::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVC using the CUDA backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::cuda::csvm{ target } { }
 };
 
@@ -200,7 +245,17 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvr(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::cuda::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the CUDA backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    explicit csvr(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::cuda::csvm{} { }
 
     /**
@@ -210,7 +265,18 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvr(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::cuda::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVR using the CUDA backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    explicit csvr(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::cuda::csvm{ target } { }
 
     /**
@@ -220,7 +286,18 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::cuda::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the CUDA backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::cuda::csvm{} { }
 
     /**
@@ -231,7 +308,19 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::cuda::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVR using the CUDA backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::cuda::csvm{ target } { }
 };
 
diff --git a/include/plssvm/backends/HIP/csvm.hpp b/include/plssvm/backends/HIP/csvm.hpp
index c85cca654..e1f64e58e 100644
--- a/include/plssvm/backends/HIP/csvm.hpp
+++ b/include/plssvm/backends/HIP/csvm.hpp
@@ -20,6 +20,7 @@
 #include "plssvm/constants.hpp"                              // plssvm::real_type
 #include "plssvm/detail/memory_size.hpp"                     // plssvm::detail::memory_size
 #include "plssvm/detail/type_traits.hpp"                     // PLSSVM_REQUIRES, plssvm::detail::is_one_type_of
+#include "plssvm/mpi/communicator.hpp"                       // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                              // plssvm::parameter, plssvm::detail::parameter
 #include "plssvm/svm/csvc.hpp"                               // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                               // plssvm::detail::csvm_backend_exists
@@ -152,17 +153,38 @@ class csvc : public ::plssvm::csvc,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvc(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::hip::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVC using the HIP backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::hip::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the HIP backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::hip::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVC using the HIP backend on the @p target platform with the parameters given through @p params.
      * @param[in] target the target platform used for this C-SVC
+     * @param[in] comm the used MPI communicator
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvc(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvc(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::hip::csvm{ target } { }
 
     /**
@@ -172,7 +194,18 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::hip::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the HIP backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvc(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::hip::csvm{} { }
 
     /**
@@ -183,7 +216,19 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::hip::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVC using the HIP backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::hip::csvm{ target } { }
 };
 
@@ -200,17 +245,38 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvr(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::hip::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVR using the HIP backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    explicit csvr(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::hip::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the HIP backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvr(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::hip::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVR using the HIP backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVR
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvr(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvr(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::hip::csvm{ target } { }
 
     /**
@@ -220,7 +286,18 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::hip::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the HIP backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::hip::csvm{} { }
 
     /**
@@ -231,7 +308,19 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::hip::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVR using the HIP backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::hip::csvm{ target } { }
 };
 
diff --git a/include/plssvm/backends/HPX/csvm.hpp b/include/plssvm/backends/HPX/csvm.hpp
index b6d9013a5..37afd8638 100644
--- a/include/plssvm/backends/HPX/csvm.hpp
+++ b/include/plssvm/backends/HPX/csvm.hpp
@@ -19,6 +19,7 @@
 #include "plssvm/detail/move_only_any.hpp"  // plssvm::detail::move_only_any
 #include "plssvm/detail/type_traits.hpp"    // PLSSVM_REQUIRES, plssvm::detail::is_one_type_of
 #include "plssvm/matrix.hpp"                // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"      // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"             // plssvm::parameter, plssvm::detail::has_only_parameter_named_args_v
 #include "plssvm/solver_types.hpp"          // plssvm::solver_type
 #include "plssvm/svm/csvc.hpp"              // plssvm::csvc
@@ -133,7 +134,17 @@ class csvc : public ::plssvm::csvc,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvc(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::hpx::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the HPX backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::hpx::csvm{} { }
 
     /**
@@ -143,7 +154,18 @@ class csvc : public ::plssvm::csvc,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvc(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::hpx::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVC using the HPX backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    explicit csvc(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::hpx::csvm{ target } { }
 
     /**
@@ -153,7 +175,18 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::hpx::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the HPX backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvc(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::hpx::csvm{} { }
 
     /**
@@ -164,7 +197,19 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::hpx::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVC using the HPX backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::hpx::csvm{ target } { }
 };
 
@@ -181,17 +226,38 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvr(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::hpx::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVR using the HPX backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvr(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::hpx::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the HPX backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvr(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::hpx::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVR using the HPX backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVR
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvr(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvr(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::hpx::csvm{ target } { }
 
     /**
@@ -201,7 +267,18 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::hpx::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the HPX backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::hpx::csvm{} { }
 
     /**
@@ -212,7 +289,19 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::hpx::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVR using the HPX backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::hpx::csvm{ target } { }
 };
 
diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
index 3fcdb04d0..4d7c412cf 100644
--- a/include/plssvm/backends/HPX/detail/utility.hpp
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -15,7 +15,8 @@
 #pragma once
 
 #include "boost/atomic/atomic_ref.hpp"  // boost::atomic_ref
-#include <string>                       // std::string
+
+#include <string>  // std::string
 
 namespace plssvm::hpx::detail {
 
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
index 09f6e6358..20cbad247 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
@@ -19,13 +19,14 @@
 #include "plssvm/matrix.hpp"         // plssvm::soa_matrix
 #include "plssvm/shape.hpp"          // plssvm::shape
 
-#include <array>                                           // std::array
-#include <cmath>                                           // std::ceil
-#include <cstddef>                                         // std::size_t
-#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
-#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
-#include <numeric>                                         // std::iota
-#include <vector>                                          // std::vector
+#include "hpx/execution.hpp"                               // hpx::execution::par_unseq
+#include "hpx/parallel/segmented_algorithms/for_each.hpp"  // hpx::for_each
+
+#include <array>    // std::array
+#include <cmath>    // std::ceil
+#include <cstddef>  // std::size_t
+#include <numeric>  // std::iota
+#include <vector>   // std::vector
 
 namespace plssvm::hpx::detail {
 
@@ -33,33 +34,37 @@ namespace plssvm::hpx::detail {
  * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a symmetric matrix (memory optimized), @p B and @p C are matrices, and @p alpha and @p beta are scalars.
  * @param[in] num_rows the number of rows in @p A and @p C
  * @param[in] num_rhs the number of columns in @p B and @p C
+ * @param[in] device_specific_num_rows the number of rows the current device is responsible for
+ * @param[in] row_offset the first row in @p data the current device is responsible for
  * @param[in] alpha the scalar alpha value
  * @param[in] A the matrix @p A
  * @param[in] B the matrix @p B
  * @param[in] beta the scalar beta value
  * @param[in,out] C the matrix @p C, also used as result matrix
  */
-inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
-    PLSSVM_ASSERT(A.size() == (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2, "A matrix sizes mismatch!: {} != {}", A.size(), (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2);
+inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
+    PLSSVM_ASSERT(!A.empty(), "A matrix may not be empty!");
     PLSSVM_ASSERT(B.shape() == (plssvm::shape{ num_rhs, num_rows }), "B matrix sizes mismatch!: {} != [{}, {}]", B.shape(), num_rhs, num_rows);
     PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(num_rows >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, num_rows);
+    PLSSVM_ASSERT(num_rows >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, num_rows);
 
     // calculate constants
     const auto blocked_num_rhs = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
-    const auto blocked_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_rows) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
     // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_num_rhs * blocked_num_rows);  // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_rhs * blocked_device_specific_num_rows);  // define range over which should be iterated
     std::iota(range.begin(), range.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.cbegin(), range.cend(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const std::size_t rhs = idx / blocked_num_rows;
-        const std::size_t row = idx % blocked_num_rows;
+        const std::size_t rhs = idx / blocked_device_specific_num_rows;
+        const std::size_t row = idx % blocked_device_specific_num_rows;
 
         const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
@@ -68,21 +73,96 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
         std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
 
         // iterate over all features
-        for (std::size_t dim = 0; dim < num_rows; ++dim) {
+        for (std::size_t dim = 0; dim < (num_rows - row_offset); ++dim) {
             // perform the dot product calculation
             for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
                 for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
-                    const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
-                    const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+                    const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_j);
 
                     real_type A_val = 0.0;
                     // determine on which side of the diagonal we are located
-                    if (dim < global_j) {
-                        A_val = A.data()[dim * (num_rows + PADDING_SIZE_uz) + global_j - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
+                    if (dim < global_row) {
+                        A_val = A[dim * (num_rows - row_offset + PADDING_SIZE_uz) + global_row - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
                     } else {
-                        A_val = A.data()[global_j * (num_rows + PADDING_SIZE_uz) + dim - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
+                        A_val = A[global_row * (num_rows - row_offset + PADDING_SIZE_uz) + dim - global_row * (global_row + std::size_t{ 1 }) / std::size_t{ 2 }];
                     }
-                    temp[internal_i][internal_j] += A_val * B.data()[dim * (num_rhs + PADDING_SIZE_uz) + global_i];
+                    temp[internal_i][internal_j] += A_val * B(global_rhs, dim + row_offset);
+                }
+            }
+        }
+
+        // apply the (partial) BLAS operation and update C
+        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                const std::size_t device_global_row = row_idx + static_cast<std::size_t>(internal_j);
+                const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_j);
+
+                // be sure to not perform out of bounds accesses
+                if (global_rhs < num_rhs && device_global_row < device_specific_num_rows) {
+                    C(global_rhs, global_row) = alpha * temp[internal_i][internal_j] + beta * C(global_rhs, global_row);
+                }
+            }
+        }
+    });
+}
+
+/**
+ * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars.
+ * @param[in] num_rows the number of rows in @p A and @p C
+ * @param[in] num_rhs the number of columns in @p B and @p C
+ * @param[in] num_mirror_rows the number of rows to mirror down
+ * @param[in] device_specific_num_rows the number of rows in @p A and number of rows in @p B; thr rows in @p A are potentially distributed across multiple devices
+ * @param[in] row_offset the first row this device is responsible for
+ * @param[in] alpha the scalar alpha value
+ * @param[in] A the matrix @p A
+ * @param[in] B the matrix @p B
+ * @param[in] beta the scalar beta value
+ * @param[in,out] C the matrix @p C, also used as result matrix
+ */
+inline void device_kernel_symm_mirror(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t num_mirror_rows, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
+    // compute: C = alpha * A * B + beta * C with A in m x k, B in n x k, and C in n x m, alpha, beta as scalar
+    PLSSVM_ASSERT(!A.empty(), "A matrix may not be empty!");
+    PLSSVM_ASSERT(B.shape() == (plssvm::shape{ num_rhs, num_rows }), "B matrix sizes mismatch!: {} != [{}, {}]", B.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(num_rows >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, num_rows);
+    PLSSVM_ASSERT(num_rows >= num_mirror_rows, "The number of mirror rows ({}) cannot be greater the the total number of rows ({})!", num_mirror_rows, num_rows);
+    PLSSVM_ASSERT(num_rows >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, num_rows);
+
+    // calculate constants
+    const auto blocked_num_rhs = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_num_mirror_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_mirror_rows) / INTERNAL_BLOCK_SIZE));
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_rhs * blocked_num_mirror_rows);  // define range over which should be iterated
+    std::iota(range.begin(), range.end(), 0);
+
+    ::hpx::for_each(::hpx::execution::par_unseq, range.cbegin(), range.cend(), [&](const std::size_t idx) {
+        // calculate the indices used in the current thread
+        const std::size_t rhs = idx / blocked_num_mirror_rows;
+        const std::size_t row = idx % blocked_num_mirror_rows;
+
+        const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < device_specific_num_rows; ++dim) {
+            // perform the dot product calculation
+            for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                    const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_j);
+
+                    const real_type A_val = A[dim * (num_rows - row_offset + PADDING_SIZE_uz) - (dim - std::size_t{ 1 }) * dim / std::size_t{ 2 } + device_specific_num_rows - dim + global_row];
+                    temp[internal_i][internal_j] += A_val * B(global_rhs, row_offset + dim);
                 }
             }
         }
@@ -90,12 +170,13 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
         // apply the (partial) BLAS operation and update C
         for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
             for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
-                const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
-                const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+                const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                const std::size_t partial_global_row = row_idx + static_cast<std::size_t>(internal_j);
+                const std::size_t global_row = row_offset + device_specific_num_rows + row_idx + static_cast<std::size_t>(internal_j);
 
                 // be sure to not perform out of bounds accesses
-                if (global_i < num_rhs && global_j < num_rows) {
-                    C.data()[global_j * (num_rhs + PADDING_SIZE_uz) + global_i] = alpha * temp[internal_i][internal_j] + beta * C.data()[global_j * (num_rhs + PADDING_SIZE_uz) + global_i];
+                if (global_rhs < num_rhs && partial_global_row < num_mirror_rows) {
+                    C(global_rhs, global_row) = alpha * temp[internal_i][internal_j] + beta * C(global_rhs, global_row);
                 }
             }
         }
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
index 2e59bf078..e575c6af2 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -20,13 +20,14 @@
 #include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                                // plssvm::aos_matrix
 
-#include <array>                                           // std::array
-#include <cmath>                                           // std::ceil, std::sqrt
-#include <cstddef>                                         // std::size_t
-#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
-#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
-#include <numeric>                                         // std::iota
-#include <vector>                                          // std::vector
+#include "hpx/execution.hpp"                               // hpx::execution::par_unseq
+#include "hpx/parallel/segmented_algorithms/for_each.hpp"  // hpx::for_each
+
+#include <array>    // std::array
+#include <cmath>    // std::ceil, std::sqrt
+#include <cstddef>  // std::size_t
+#include <numeric>  // std::iota
+#include <vector>   // std::vector
 
 namespace plssvm::hpx::detail {
 
@@ -34,72 +35,80 @@ namespace plssvm::hpx::detail {
  * @brief Assemble the kernel matrix using the @p kernel function.
  * @tparam kernel the compile-time kernel function to use
  * @tparam Args the types of the potential additional arguments for the @p kernel function
- * @param[in] q the `q` vector
  * @param[out] kernel_matrix the resulting kernel matrix
  * @param[in] data the data matrix
+ * @param[in] device_specific_num_rows the number of rows the current device is responsible for
+ * @param[in] row_offset the first row in @p data the current device is responsible for
+ * @param[in] q the `q` vector
  * @param[in] QA_cost he bottom right matrix entry multiplied by cost
  * @param[in] cost 1 / the cost parameter in the C-SVM
  * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
  */
 template <kernel_function_type kernel, typename... Args>
-void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_type> &kernel_matrix, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, Args... kernel_function_parameter) {
+void device_kernel_assembly(std::vector<real_type> &kernel_matrix, const soa_matrix<real_type> &data, const std::size_t device_specific_num_rows, const std::size_t row_offset, const std::vector<real_type> &q, const real_type QA_cost, const real_type cost, Args... kernel_function_parameter) {
     PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
-    PLSSVM_ASSERT(kernel_matrix.size() == (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2, "Sizes mismatch (SYMM)!: {} != {}", kernel_matrix.size(), (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2);
+    PLSSVM_ASSERT(!kernel_matrix.empty(), "A matrix may not be empty!");
+    PLSSVM_ASSERT(q.size() >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, q.size());
+    PLSSVM_ASSERT(q.size() >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, q.size());
     PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
 
-    const std::size_t dept = q.size();
-    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    // calculate constants
+    const std::size_t num_rows = data.num_rows() - 1;
     const std::size_t num_features = data.num_cols();
+    const auto blocked_row_range = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows - row_offset) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_rows) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
-    // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_dept * (blocked_dept + 1) / 2);
-    std::iota(range.begin(), range.end(), 0);
+    // count the number of entries in the final index list
+    std::vector<std::size_t> indices(blocked_row_range * blocked_device_specific_num_rows);  // define range over which should be iterated
+    std::iota(indices.begin(), indices.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, indices.cbegin(), indices.cend(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const std::size_t col = static_cast<std::size_t>(static_cast<double>(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
-        const std::size_t row = static_cast<std::size_t>(0.5 * static_cast<double>(2 * (idx - col * blocked_dept) + col * col + col));
-
-        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
-        const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
-
-        // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
-        // create a thread private array used for internal caching
-        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
-
-        // iterate over all features
-        for (std::size_t dim = 0; dim < num_features; ++dim) {
-            // perform the feature reduction calculation
-            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
-                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
-
-                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+        const std::size_t row_idx = (idx / blocked_device_specific_num_rows) * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t col_idx = (idx % blocked_device_specific_num_rows) * INTERNAL_BLOCK_SIZE_uz;
+
+        // only calculate the upper triangular matrix
+        if (row_idx >= col_idx) {
+            // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+            // create a thread private array used for internal caching
+            std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+            // iterate over all features
+            for (std::size_t dim = 0; dim < num_features; ++dim) {
+                // perform the feature reduction calculation
+                for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                    for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                        const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                        const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
+
+                        temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data(global_row, dim), data(global_col, dim));
+                    }
                 }
             }
-        }
 
-        // apply the remaining part of the kernel function and store the value in the output kernel matrix
-        for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
-            for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                // calculate the indices to access the kernel matrix (the part stored on the current device)
-                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
-
-                // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
-                if (global_row < dept && global_col < dept && global_row >= global_col) {
-                    real_type temp_ij = temp[internal_row][internal_col];
-                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
-                    // apply the cost on the diagonal
-                    if (global_row == global_col) {
-                        temp_ij += cost;
+            // apply the remaining part of the kernel function and store the value in the output kernel matrix
+            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                    // calculate the indices to access the kernel matrix (the part stored on the current device)
+                    const std::size_t device_global_row = row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t device_global_col = col_idx + static_cast<std::size_t>(internal_col);
+                    const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
+
+                    // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                    if (device_global_row < (num_rows - row_offset) && device_global_col < device_specific_num_rows && global_row >= global_col) {
+                        real_type temp_ij = temp[internal_row][internal_col];
+                        temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
+                        // apply the cost on the diagonal
+                        if (global_row == global_col) {
+                            temp_ij += cost;
+                        }
+                        kernel_matrix[device_global_col * (num_rows - row_offset + PADDING_SIZE_uz) - device_global_col * (device_global_col + std::size_t{ 1 }) / std::size_t{ 2 } + device_global_row] = temp_ij;
                     }
-                    kernel_matrix[global_col * (dept + PADDING_SIZE_uz) + global_row - global_col * (global_col + std::size_t{ 1 }) / std::size_t{ 2 }] = temp_ij;
                 }
             }
         }
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
index eef6b809d..06df89dac 100644
--- a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -18,18 +18,18 @@
 #include "plssvm/backends/HPX/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
 #include "plssvm/constants.hpp"                             // plssvm::real_type
 #include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
-#include "plssvm/detail/operators.hpp"                      // overloaded arithmetic operations for a plssvm::matrix
 #include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
 #include "plssvm/kernel_functions.hpp"                      // plssvm::kernel_function
 #include "plssvm/matrix.hpp"                                // aos_matrix
 
-#include <array>                                           // std::array
-#include <cmath>                                           // std::ceil
-#include <cstddef>                                         // std::size_t, std::sqrt
-#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
-#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
-#include <numeric>                                         // std::iota
-#include <vector>                                          // std::vector
+#include "hpx/execution.hpp"                               // hpx::execution::par_unseq
+#include "hpx/parallel/segmented_algorithms/for_each.hpp"  // hpx::for_each
+
+#include <array>    // std::array
+#include <cmath>    // std::ceil
+#include <cstddef>  // std::size_t, std::sqrt
+#include <numeric>  // std::iota
+#include <vector>   // std::vector
 
 namespace plssvm::hpx::detail {
 
@@ -40,86 +40,86 @@ namespace plssvm::hpx::detail {
  * @param[in] alpha the scalar alpha value
  * @param[in] q the `q` vector
  * @param[in] data the data matrix
+ * @param[in] device_specific_num_rows the number of rows the current device is responsible for
+ * @param[in] row_offset the first row in @p data the current device is responsible for
  * @param[in] QA_cost he bottom right matrix entry multiplied by cost
  * @param[in] cost 1 / the cost parameter in the C-SVM
  * @param[in] B the matrix @p B
- * @param[in] beta the beta alpha value
  * @param[in,out] C the matrix @p C
  * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
  */
 template <kernel_function_type kernel, typename... Args>
-inline void device_kernel_assembly_symm(const real_type alpha, const std::vector<real_type> &q, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C, Args... kernel_function_parameter) {
+inline void device_kernel_assembly_symm(const real_type alpha, const std::vector<real_type> &q, const soa_matrix<real_type> &data, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type QA_cost, const real_type cost, const soa_matrix<real_type> &B, soa_matrix<real_type> &C, Args... kernel_function_parameter) {
     PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
+    PLSSVM_ASSERT(q.size() >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, q.size());
+    PLSSVM_ASSERT(q.size() >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, q.size());
     PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
     PLSSVM_ASSERT(B.shape() == C.shape(), "The matrices B and C must have the same shape!");
     PLSSVM_ASSERT(B.num_cols() == q.size(), "The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), q.size());
 
-    using namespace operators;
-
-    // alpha * A * B + beta * C
-    C *= beta;
-
     // calculate constants
-    const std::size_t dept = q.size();
-    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_rows = data.num_rows() - 1;
     const std::size_t num_features = data.num_cols();
     const std::size_t num_classes = B.num_rows();
+    const auto blocked_row_range = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows - row_offset) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_rows) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
-    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
-    // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_dept * (blocked_dept + 1) / 2);
-    std::iota(range.begin(), range.end(), 0);
+    // count the number of entries in the final index list
+    std::vector<std::size_t> indices(blocked_row_range * blocked_device_specific_num_rows);  // define range over which should be iterated
+    std::iota(indices.begin(), indices.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, indices.cbegin(), indices.cend(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const std::size_t col = static_cast<std::size_t>(static_cast<double>(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
-        const std::size_t row = static_cast<std::size_t>(0.5 * static_cast<double>(2 * (idx - col * blocked_dept) + col * col + col));
-
-        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
-        const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
-
-        // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
-        // create a thread private array used for internal caching
-        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
-
-        // iterate over all features
-        for (std::size_t dim = 0; dim < num_features; ++dim) {
-            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
-                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
-
-                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+        const std::size_t row_idx = (idx / blocked_device_specific_num_rows) * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t col_idx = (idx % blocked_device_specific_num_rows) * INTERNAL_BLOCK_SIZE_uz;
+
+        // only calculate the upper triangular matrix
+        if (row_idx >= col_idx) {
+            // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+            // create a thread private array used for internal caching
+            std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+            // iterate over all features
+            for (std::size_t dim = 0; dim < num_features; ++dim) {
+                for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                    for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                        const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                        const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
+
+                        temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data(global_row, dim), data(global_col, dim));
+                    }
                 }
             }
-        }
 
-        // apply the remaining part of the kernel function and store the value in the output kernel matrix
-        for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
-            for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
-
-                // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
-                if (global_row < dept && global_col < dept && global_row >= global_col) {
-                    real_type temp_ij = temp[internal_row][internal_col];
-                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
-                    // apply the cost on the diagonal
-                    if (global_row == global_col) {
-                        temp_ij += cost;
-                        // calculate the values of alpha * A * B
-                        for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
-                            atomic_ref<real_type>{ C.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
-                        }
-                    } else {
-                        // calculate the values of alpha * A * B
-                        for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
-                            atomic_ref<real_type>{ C.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_col * (num_classes + PADDING_SIZE_uz) + class_idx];
-                            // symmetry
-                            atomic_ref<real_type>{ C.data()[global_col * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+            // apply the remaining part of the kernel function and store the value in the output kernel matrix
+            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                    const std::size_t device_global_row = row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t device_global_col = col_idx + static_cast<std::size_t>(internal_col);
+                    const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
+
+                    // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                    if (device_global_row < (num_rows - row_offset) && device_global_col < device_specific_num_rows && global_row >= global_col) {
+                        real_type temp_ij = temp[internal_row][internal_col];
+                        temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
+                        // apply the cost on the diagonal
+                        if (global_row == global_col) {
+                            temp_ij += cost;
+                            // calculate the values of alpha * A * B
+                            for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+                                atomic_ref<real_type>{ C(class_idx, global_row) } += alpha * temp_ij * B(class_idx, global_row);
+                            }
+                        } else {
+                            // calculate the values of alpha * A * B
+                            for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+                                atomic_ref<real_type>{ C(class_idx, global_row) } += alpha * temp_ij * B(class_idx, global_col);
+                                // symmetry
+                                atomic_ref<real_type>{ C(class_idx, global_col) } += alpha * temp_ij * B(class_idx, global_row);
+                            }
                         }
                     }
                 }
diff --git a/include/plssvm/backends/HPX/kernel/kernel_functions.hpp b/include/plssvm/backends/HPX/kernel/kernel_functions.hpp
index b7be1cb16..f007343bc 100644
--- a/include/plssvm/backends/HPX/kernel/kernel_functions.hpp
+++ b/include/plssvm/backends/HPX/kernel/kernel_functions.hpp
@@ -17,8 +17,6 @@
 #include "plssvm/constants.hpp"              // plssvm::real_type
 #include "plssvm/kernel_function_types.hpp"  // plssvm::kernel_function_type
 
-#define PLSSVM_HPX_KERNEL_FUNCTION
-
 #include <cmath>   // std::abs, std::pow, std::exp, std::tanh
 #include <limits>  // std::numeric_limits::min
 
@@ -35,7 +33,7 @@ namespace plssvm::hpx::detail {
  * @return the reduced value (`[[nodiscard]]`)
  */
 template <kernel_function_type kernel_function>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) {
+[[nodiscard]] inline real_type feature_reduce(const real_type val1, const real_type val2) {
     return val1 * val2;
 }
 
@@ -46,7 +44,7 @@ template <kernel_function_type kernel_function>
  * @return the reduced value (`[[nodiscard]]`)
  */
 template <>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::rbf>(const real_type val1, const real_type val2) {
+[[nodiscard]] inline real_type feature_reduce<kernel_function_type::rbf>(const real_type val1, const real_type val2) {
     const real_type d = val1 - val2;
     return d * d;
 }
@@ -58,7 +56,7 @@ template <>
  * @return the reduced value (`[[nodiscard]]`)
  */
 template <>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::laplacian>(const real_type val1, const real_type val2) {
+[[nodiscard]] inline real_type feature_reduce<kernel_function_type::laplacian>(const real_type val1, const real_type val2) {
     return std::abs(val1 - val2);
 }
 
@@ -70,7 +68,7 @@ template <>
  * @return the reduced value (`[[nodiscard]]`)
  */
 template <>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::chi_squared>(const real_type val1, const real_type val2) {
+[[nodiscard]] inline real_type feature_reduce<kernel_function_type::chi_squared>(const real_type val1, const real_type val2) {
     const real_type d = val1 - val2;
     return (real_type{ 1.0 } / (val1 + val2 + std::numeric_limits<real_type>::min())) * d * d;
 }
@@ -84,7 +82,7 @@ template <>
  * @return the result value (`[[nodiscard]]`)
  */
 template <kernel_function_type, typename... Args>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(real_type, Args...);
+[[nodiscard]] inline real_type apply_kernel_function(real_type, Args...);
 
 /**
  * @brief Compute the linear kernel function using @p value.
@@ -92,7 +90,7 @@ template <kernel_function_type, typename... Args>
  * @return the result value (`[[nodiscard]]`)
  */
 template <>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::linear>(const real_type value) {
+[[nodiscard]] inline real_type apply_kernel_function<kernel_function_type::linear>(const real_type value) {
     return value;
 }
 
@@ -105,7 +103,7 @@ template <>
  * @return the result value (`[[nodiscard]]`)
  */
 template <>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::polynomial>(const real_type value, const int degree, const real_type gamma, const real_type coef0) {
+[[nodiscard]] inline real_type apply_kernel_function<kernel_function_type::polynomial>(const real_type value, const int degree, const real_type gamma, const real_type coef0) {
     return std::pow(gamma * value + coef0, (real_type) degree);
 }
 
@@ -116,7 +114,7 @@ template <>
  * @return the result value (`[[nodiscard]]`)
  */
 template <>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::rbf>(const real_type value, const real_type gamma) {
+[[nodiscard]] inline real_type apply_kernel_function<kernel_function_type::rbf>(const real_type value, const real_type gamma) {
     return std::exp(-gamma * value);
 }
 
@@ -128,7 +126,7 @@ template <>
  * @return the result value (`[[nodiscard]]`)
  */
 template <>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::sigmoid>(const real_type value, const real_type gamma, const real_type coef0) {
+[[nodiscard]] inline real_type apply_kernel_function<kernel_function_type::sigmoid>(const real_type value, const real_type gamma, const real_type coef0) {
     return std::tanh(gamma * value + coef0);
 }
 
@@ -139,7 +137,7 @@ template <>
  * @return the result value (`[[nodiscard]]`)
  */
 template <>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::laplacian>(const real_type value, const real_type gamma) {
+[[nodiscard]] inline real_type apply_kernel_function<kernel_function_type::laplacian>(const real_type value, const real_type gamma) {
     return std::exp(-gamma * value);
 }
 
@@ -150,7 +148,7 @@ template <>
  * @return the result value (`[[nodiscard]]`)
  */
 template <>
-[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::chi_squared>(const real_type value, const real_type gamma) {
+[[nodiscard]] inline real_type apply_kernel_function<kernel_function_type::chi_squared>(const real_type value, const real_type gamma) {
     return std::exp(-gamma * value);
 }
 
diff --git a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
index 7b153d889..7ea68e172 100644
--- a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
+++ b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
@@ -22,13 +22,14 @@
 #include "plssvm/matrix.hpp"                                // plssvm::aos_matrix, plssvm::soa_matrix
 #include "plssvm/shape.hpp"                                 // plssvm::shape
 
-#include <array>                                           // std::array
-#include <cmath>                                           // std::fma
-#include <cstddef>                                         // std::size_t
-#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
-#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
-#include <numeric>                                         // std::iota
-#include <vector>                                          // std::vector
+#include "hpx/execution.hpp"                               // hpx::execution::par_unseq
+#include "hpx/parallel/segmented_algorithms/for_each.hpp"  // hpx::for_each
+
+#include <array>    // std::array
+#include <cmath>    // std::fma
+#include <cstddef>  // std::size_t
+#include <numeric>  // std::iota
+#include <vector>   // std::vector
 
 namespace plssvm::hpx::detail {
 
@@ -37,27 +38,29 @@ namespace plssvm::hpx::detail {
  * @param[out] w the vector to speedup the linear prediction
  * @param[in] alpha the previously learned weights
  * @param[in] support_vectors the support vectors
+ * @param[in] device_specific_num_sv the number of support vectors the current device is responsible for
+ * @param[in] sv_offset the first row in @p support_vectors the current device is responsible for
  */
-inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<real_type> &alpha, const soa_matrix<real_type> &support_vectors) {
+inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<real_type> &alpha, const soa_matrix<real_type> &support_vectors, const std::size_t device_specific_num_sv, const std::size_t sv_offset) {
     PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
     PLSSVM_ASSERT(w.shape() == (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }), "Shape mismatch: {} vs {}!", w.shape(), (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }));
+    PLSSVM_ASSERT(support_vectors.num_rows() >= device_specific_num_sv, "The number of place specific sv ({}) cannot be greater the the total number of sv ({})!", device_specific_num_sv, support_vectors.num_rows());
+    PLSSVM_ASSERT(support_vectors.num_rows() >= sv_offset, "The sv offset ({}) cannot be greater the the total number of sv ({})!", sv_offset, support_vectors.num_rows());
 
     // calculate constants
-    const std::size_t num_features = support_vectors.num_cols();
-    const auto blocked_num_features = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_features) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_classes = alpha.num_rows();
     const auto blocked_num_classes = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_classes) / INTERNAL_BLOCK_SIZE));
-    const std::size_t num_support_vectors = support_vectors.num_rows();
+    const std::size_t num_features = support_vectors.num_cols();
+    const auto blocked_num_features = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_features) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
-    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
     // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_num_features * blocked_num_classes);
+    std::vector<std::size_t> range(blocked_num_classes * blocked_num_features);
     std::iota(range.begin(), range.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.cbegin(), range.cend(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
         const std::size_t feature = idx / blocked_num_classes;
         const std::size_t c = idx % blocked_num_classes;
@@ -69,14 +72,14 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
         std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
 
         // iterate over all features
-        for (std::size_t sv = 0; sv < num_support_vectors; ++sv) {
+        for (std::size_t sv = 0; sv < device_specific_num_sv; ++sv) {
             // perform the feature reduction calculation
             for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
                 for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
                     const std::size_t global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
                     const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
-                    temp[internal_feature][internal_class] += alpha.data()[global_class_idx * (num_support_vectors + PADDING_SIZE_uz) + sv] * support_vectors.data()[global_feature_idx * (num_support_vectors + PADDING_SIZE_uz) + sv];
+                    temp[internal_feature][internal_class] += alpha(global_class_idx, sv_offset + sv) * support_vectors(sv_offset + sv, global_feature_idx);
                 }
             }
         }
@@ -87,7 +90,7 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
                 const std::size_t global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
                 const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
-                w.data()[global_feature_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_feature][internal_class];
+                w(global_class_idx, global_feature_idx) = temp[internal_feature][internal_class];
             }
         }
     });
@@ -99,28 +102,30 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
  * @param[in] w the vector to speedup the calculations
  * @param[in] rho the previously learned bias
  * @param[in] predict_points the data points to predict
+ * @param[in] device_specific_num_predict_points the number of predict points the current device is responsible for
+ * @param[in] row_offset the first row in @p predict_points the current device is responsible for
  */
-inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, const soa_matrix<real_type> &w, const std::vector<real_type> &rho, const soa_matrix<real_type> &predict_points) {
+inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, const soa_matrix<real_type> &w, const std::vector<real_type> &rho, const soa_matrix<real_type> &predict_points, const std::size_t device_specific_num_predict_points, const std::size_t row_offset) {
     PLSSVM_ASSERT(w.num_rows() == rho.size(), "Size mismatch: {} vs {}!", w.num_rows(), rho.size());
     PLSSVM_ASSERT(w.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", w.num_cols(), predict_points.num_cols());
     PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), w.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), w.num_rows() }));
+    PLSSVM_ASSERT(predict_points.num_rows() >= device_specific_num_predict_points, "The number of place specific predict points ({}) cannot be greater the the total number of predict points ({})!", device_specific_num_predict_points, predict_points.num_rows());
+    PLSSVM_ASSERT(predict_points.num_rows() >= row_offset, "The row offset ({}) cannot be greater the the total number of predict points ({})!", row_offset, predict_points.num_rows());
 
     // calculate constants
-    const std::size_t num_predict_points = predict_points.num_rows();
-    const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_predict_points) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_classes = prediction.num_cols();
     const auto blocked_num_classes = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_classes) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_features = predict_points.num_cols();
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
-    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
     // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_num_predict_points * blocked_num_classes);
+    std::vector<std::size_t> range(blocked_device_specific_num_predict_points * blocked_num_classes);
     std::iota(range.begin(), range.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.cbegin(), range.cend(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
         const std::size_t pp = idx / blocked_num_classes;
         const std::size_t c = idx % blocked_num_classes;
@@ -136,10 +141,10 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
             // perform the feature reduction calculation
             for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
                 for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
-                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                     const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
-                    temp[internal_pp][internal_class] += w.data()[dim * (num_classes + PADDING_SIZE_uz) + global_class_idx] * predict_points.data()[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx];
+                    temp[internal_pp][internal_class] += w(global_class_idx, dim) * predict_points(global_pp_idx, dim);
                 }
             }
         }
@@ -147,11 +152,12 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
         // perform the dot product calculation
         for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
             for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
-                const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                const std::size_t device_global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                 const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
-                if (global_pp_idx < num_predict_points && global_class_idx < num_classes) {
-                    prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_pp][internal_class] - rho.data()[global_class_idx];
+                if (device_global_pp_idx < device_specific_num_predict_points && global_class_idx < num_classes) {
+                    prediction(global_pp_idx, global_class_idx) = temp[internal_pp][internal_class] - rho[global_class_idx];
                 }
             }
         }
@@ -167,32 +173,34 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
  * @param[in] rho the previously learned bias
  * @param[in] support_vectors the support vectors
  * @param[in] predict_points the data points to predict
+ * @param[in] device_specific_num_predict_points the number of predict points the current device is responsible for
+ * @param[in] row_offset the first row in @p predict_points the current device is responsible for
  * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function
  */
 template <kernel_function_type kernel, typename... Args>
-inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, const soa_matrix<real_type> &support_vectors, const soa_matrix<real_type> &predict_points, Args... kernel_function_parameter) {
+inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, const soa_matrix<real_type> &support_vectors, const soa_matrix<real_type> &predict_points, const std::size_t device_specific_num_predict_points, const std::size_t row_offset, Args... kernel_function_parameter) {
     PLSSVM_ASSERT(alpha.num_rows() == rho.size(), "Size mismatch: {} vs {}!", alpha.num_rows(), rho.size());
     PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
     PLSSVM_ASSERT(support_vectors.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", support_vectors.num_cols(), predict_points.num_cols());
     PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }));
+    PLSSVM_ASSERT(predict_points.num_rows() >= device_specific_num_predict_points, "The number of place specific predict points ({}) cannot be greater the the total number of predict points ({})!", device_specific_num_predict_points, predict_points.num_rows());
+    PLSSVM_ASSERT(predict_points.num_rows() >= row_offset, "The row offset ({}) cannot be greater the the total number of predict points ({})!", row_offset, predict_points.num_rows());
 
     // calculate constants
     const std::size_t num_classes = alpha.num_rows();
     const std::size_t num_support_vectors = support_vectors.num_rows();
     const auto blocked_num_support_vectors = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_support_vectors) / INTERNAL_BLOCK_SIZE));
-    const std::size_t num_predict_points = predict_points.num_rows();
-    const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_predict_points) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_features = predict_points.num_cols();
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
-    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
     // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_num_predict_points * blocked_num_support_vectors);
+    std::vector<std::size_t> range(blocked_device_specific_num_predict_points * blocked_num_support_vectors);
     std::iota(range.begin(), range.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.cbegin(), range.cend(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
         const std::size_t pp = idx / blocked_num_support_vectors;
         const std::size_t sv = idx % blocked_num_support_vectors;
@@ -208,11 +216,11 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
             // perform the feature reduction calculation
             for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
                 for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
-                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                     const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
 
-                    temp[internal_pp][internal_sv] += detail::feature_reduce<kernel>(support_vectors.data()[dim * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx],
-                                                                                     predict_points.data()[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx]);
+                    temp[internal_pp][internal_sv] += detail::feature_reduce<kernel>(support_vectors(global_sv_idx, dim),
+                                                                                     predict_points(global_pp_idx, dim));
                 }
             }
         }
@@ -228,16 +236,17 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
         for (std::size_t a = 0; a < num_classes; ++a) {
             for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
                 for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
-                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t device_global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                     const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
 
                     // be sure to not perform out of bounds accesses
-                    if (global_pp_idx < num_predict_points && global_sv_idx < num_support_vectors) {
+                    if (device_global_pp_idx < device_specific_num_predict_points && global_sv_idx < num_support_vectors) {
                         if (global_sv_idx == 0) {
-                            atomic_ref<real_type>{ prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } += -rho.data()[a];
+                            atomic_ref<real_type>{ prediction(global_pp_idx, a) } += -rho[a];
                         }
-                        atomic_ref<real_type>{ prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } +=
-                            temp[internal_pp][internal_sv] * alpha.data()[a * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx];
+                        atomic_ref<real_type>{ prediction(global_pp_idx, a) } +=
+                            temp[internal_pp][internal_sv] * alpha(a, global_sv_idx);
                     }
                 }
             }
diff --git a/include/plssvm/backends/Kokkos/csvm.hpp b/include/plssvm/backends/Kokkos/csvm.hpp
index 4172183d7..5a77ef1e1 100644
--- a/include/plssvm/backends/Kokkos/csvm.hpp
+++ b/include/plssvm/backends/Kokkos/csvm.hpp
@@ -23,6 +23,7 @@
 #include "plssvm/detail/igor_utility.hpp"                    // plssvm::detail::get_value_from_named_parameter
 #include "plssvm/detail/memory_size.hpp"                     // plssvm::detail::memory_size
 #include "plssvm/detail/type_traits.hpp"                     // PLSSVM_REQUIRES, plssvm::detail::is_one_type_of
+#include "plssvm/mpi/communicator.hpp"                       // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                              // plssvm::parameter, plssvm::detail::{has_only_kokkos_parameter_named_args_v, has_only_kokkos_named_args_v}
 #include "plssvm/svm/csvc.hpp"                               // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                               // plssvm::detail::csvm_backend_exists
@@ -187,19 +188,44 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_named_args_v<Args...>)>
     explicit csvc(const parameter params, Args &&...named_kokkos_args) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::kokkos::csvm(target_platform::automatic, std::forward<Args>(named_kokkos_args)...) { }
 
+    /**
+     * @brief Construct a new C-SVC using the Kokkos backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @param[in] named_kokkos_args the additional optional Kokkos specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const parameter params, Args &&...named_kokkos_args) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::kokkos::csvm(target_platform::automatic, std::forward<Args>(named_kokkos_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVC using the Kokkos backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @param[in] named_kokkos_args the additional optional Kokkos specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_named_args_v<Args...>)>
+    csvc(const target_platform target, const parameter params, Args &&...named_kokkos_args) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::kokkos::csvm(target, std::forward<Args>(named_kokkos_args)...) { }
+
     /**
      * @brief Construct a new C-SVC using the Kokkos backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVC
      * @param[in] params struct encapsulating all possible SVM parameters
      * @param[in] named_kokkos_args the additional optional Kokkos specific named arguments
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_named_args_v<Args...>)>
-    explicit csvc(const target_platform target, const parameter params, Args &&...named_kokkos_args) :
-        ::plssvm::csvm{ params },
+    csvc(mpi::communicator comm, const target_platform target, const parameter params, Args &&...named_kokkos_args) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::kokkos::csvm(target, std::forward<Args>(named_kokkos_args)...) { }
 
     /**
@@ -209,7 +235,18 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
     explicit csvc(Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::kokkos::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVC using the Kokkos backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
+    explicit csvc(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::kokkos::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
 
     /**
@@ -220,7 +257,19 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
     explicit csvc(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::kokkos::csvm(target, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVC using the Kokkos backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::kokkos::csvm(target, std::forward<Args>(named_args)...) { }
 };
 
@@ -238,20 +287,45 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_named_args_v<Args...>)>
-    explicit csvr(parameter params, Args &&...named_kokkos_args) :
-        ::plssvm::csvm{ params },
+    explicit csvr(const parameter params, Args &&...named_kokkos_args) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::kokkos::csvm(target_platform::automatic, std::forward<Args>(named_kokkos_args)...) { }
 
+    /**
+     * @brief Construct a new C-SVR using the Kokkos backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @param[in] named_kokkos_args the additional optional Kokkos specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const parameter params, Args &&...named_kokkos_args) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::kokkos::csvm(target_platform::automatic, std::forward<Args>(named_kokkos_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVR using the Kokkos backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @param[in] named_kokkos_args the additional optional Kokkos specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_named_args_v<Args...>)>
+    csvr(const target_platform target, const parameter params, Args &&...named_kokkos_args) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::kokkos::csvm(target, std::forward<Args>(named_kokkos_args)...) { }
+
     /**
      * @brief Construct a new C-SVR using the Kokkos backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVR
      * @param[in] params struct encapsulating all possible SVM parameters
      * @param[in] named_kokkos_args the additional optional Kokkos specific named arguments
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_named_args_v<Args...>)>
-    explicit csvr(target_platform target, parameter params, Args &&...named_kokkos_args) :
-        ::plssvm::csvm{ params },
+    csvr(mpi::communicator comm, const target_platform target, const parameter params, Args &&...named_kokkos_args) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::kokkos::csvm(target, std::forward<Args>(named_kokkos_args)...) { }
 
     /**
@@ -261,7 +335,18 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
     explicit csvr(Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::kokkos::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVR using the Kokkos backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::kokkos::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
 
     /**
@@ -272,7 +357,19 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
     explicit csvr(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::kokkos::csvm(target, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVR using the Kokkos backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::kokkos::csvm(target, std::forward<Args>(named_args)...) { }
 };
 
diff --git a/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp b/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp
index da0aaf755..0ded898c0 100644
--- a/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp
+++ b/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp
@@ -15,6 +15,7 @@
 #include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp"  // plssvm::kokkos::detail::constexpr_available_execution_spaces
 #include "plssvm/backends/Kokkos/execution_space.hpp"                              // plssvm::kokkos::execution_space
 #include "plssvm/backends/Kokkos/execution_space_type_traits.hpp"                  // plssvm::kokkos::execution_space_to_kokkos_type_t
+#include "plssvm/mpi/communicator.hpp"                                             // plssvm::mpi::communicator
 #include "plssvm/target_platforms.hpp"                                             // plssvm::target_platform
 
 #include <array>       // std::array
@@ -190,9 +191,10 @@ class device_wrapper {
  * @brief Get a list of all available devices in the execution @p space that are supported by the @p target platform.
  * @param[in] space the Kokkos::ExecutionSpace to retrieve the devices from
  * @param[in] target the target platform that must be supported
+ * @param[in] comm the used MPI communicator
  * @return all devices for the @p target in the Kokkos::ExecutionSpace @p space (`[[nodiscard]]`)
  */
-[[nodiscard]] std::vector<device_wrapper> get_device_list(execution_space space, target_platform target);
+[[nodiscard]] std::vector<device_wrapper> get_device_list(execution_space space, target_platform target, const mpi::communicator &comm);
 
 }  // namespace plssvm::kokkos::detail
 
diff --git a/include/plssvm/backends/OpenCL/csvm.hpp b/include/plssvm/backends/OpenCL/csvm.hpp
index 30b3020a5..f52ec29cd 100644
--- a/include/plssvm/backends/OpenCL/csvm.hpp
+++ b/include/plssvm/backends/OpenCL/csvm.hpp
@@ -22,6 +22,7 @@
 #include "plssvm/constants.hpp"                             // plssvm::real_type
 #include "plssvm/detail/memory_size.hpp"                    // plssvm::detail::memory_size
 #include "plssvm/detail/type_traits.hpp"                    // PLSSVM_REQUIRES, plssvm::detail::is_one_type_of
+#include "plssvm/mpi/communicator.hpp"                      // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                             // plssvm::parameter, plssvm::detail::parameter
 #include "plssvm/svm/csvc.hpp"                              // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                              // plssvm::detail::csvm_backend_exists
@@ -158,17 +159,38 @@ class csvc : public ::plssvm::csvc,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvc(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::opencl::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVC using the OpenCL backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::opencl::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the OpenCL backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::opencl::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVC using the OpenCL backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVC
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvc(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvc(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::opencl::csvm{ target } { }
 
     /**
@@ -178,7 +200,18 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::opencl::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the OpenCL backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvc(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::opencl::csvm{} { }
 
     /**
@@ -189,7 +222,19 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::opencl::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVC using the OpenCL backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::opencl::csvm{ target } { }
 };
 
@@ -206,17 +251,38 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvr(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::opencl::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVR using the OpenCL backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvr(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::opencl::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the OpenCL backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvr(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::opencl::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVR using the OpenCL backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVR
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvr(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvr(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::opencl::csvm{ target } { }
 
     /**
@@ -226,7 +292,18 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::opencl::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the OpenCL backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::opencl::csvm{} { }
 
     /**
@@ -237,7 +314,19 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::opencl::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVR using the OpenCL backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::opencl::csvm{ target } { }
 };
 
diff --git a/include/plssvm/backends/OpenCL/detail/jit_info.hpp b/include/plssvm/backends/OpenCL/detail/jit_info.hpp
new file mode 100644
index 000000000..edd425c96
--- /dev/null
+++ b/include/plssvm/backends/OpenCL/detail/jit_info.hpp
@@ -0,0 +1,74 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief A simple struct encapsulating JIT compilation information.
+ */
+
+#ifndef PLSSVM_BACKENDS_OPENCL_DETAIL_JIT_INFO_HPP_
+#define PLSSVM_BACKENDS_OPENCL_DETAIL_JIT_INFO_HPP_
+
+#include "fmt/base.h"     // fmt::formatter
+#include "fmt/ostream.h"  // fmt::ostream_formatter
+
+#include <chrono>  // std::chrono::milliseconds
+#include <iosfwd>  // forward declare std::ostream and std::istream
+#include <string>  // std::string
+
+namespace plssvm::opencl::detail {
+
+/**
+ * @brief A struct encapsulating information regarding the current jit compilation.
+ */
+struct jit_info {
+    /**
+     * @brief An enumeration describing the state of the kernel cache.
+     */
+    enum class caching_status {
+        /// The kernel cache was successful and could be used.
+        success,
+        /// No kernel cache for the current kernel versions found. JIT compile kernels again.
+        error_no_cached_files,
+        /// The number of cached files is wrong. JIT compile kernels again.
+        error_invalid_number_of_cached_files,
+    };
+
+    /// `true` if inline PTX for the atomicAdd implementation on NVIDIA GPUs is used.
+    bool use_ptx_inline{ false };
+    /// The state of the kernel cache.
+    caching_status cache_state{ caching_status::success };
+    /// The kernel cache dir.
+    std::string cache_dir{};
+    /// The duration of the JIT compilation.
+    std::chrono::milliseconds duration{};
+};
+
+/**
+ * @brief Output the @p status to the given output-stream @p out.
+ * @param[in,out] out the output-stream to write the JIT cache status type to
+ * @param[in] status the JIT cache status
+ * @return the output-stream
+ */
+std::ostream &operator<<(std::ostream &out, jit_info::caching_status status);
+
+/**
+ * @brief Create a JIT report from @p info to output if more than one MPI rank is active.
+ * @param[in] info the JIT compilation information
+ * @return the report string (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string create_jit_report(const jit_info &info);
+
+}  // namespace plssvm::opencl::detail
+
+/// @cond Doxygen_suppress
+
+template <>
+struct fmt::formatter<plssvm::opencl::detail::jit_info::caching_status> : fmt::ostream_formatter { };
+
+/// @endcond
+
+#endif  // PLSSVM_BACKENDS_OPENCL_DETAIL_JIT_INFO_HPP_
diff --git a/include/plssvm/backends/OpenCL/detail/utility.hpp b/include/plssvm/backends/OpenCL/detail/utility.hpp
index 5e58435f3..f2d30947c 100644
--- a/include/plssvm/backends/OpenCL/detail/utility.hpp
+++ b/include/plssvm/backends/OpenCL/detail/utility.hpp
@@ -17,10 +17,12 @@
 #include "plssvm/backends/OpenCL/detail/command_queue.hpp"  // plssvm::opencl::detail::command_queue
 #include "plssvm/backends/OpenCL/detail/context.hpp"        // plssvm::opencl::detail::context
 #include "plssvm/backends/OpenCL/detail/error_code.hpp"     // plssvm::opencl::detail::error_code
+#include "plssvm/backends/OpenCL/detail/jit_info.hpp"       // plssvm::opencl::detail::jit_info
 #include "plssvm/backends/OpenCL/detail/kernel.hpp"         // plssvm::opencl::detail::compute_kernel_name
 #include "plssvm/backends/OpenCL/exceptions.hpp"            // plssvm::opencl::backend_exception
 #include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
 #include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                      // plssvm::mpi::communicator
 #include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
 
 #include "CL/cl.h"  // cl_uint, cl_int, clSetKernelArg, clEnqueueNDRangeKernel, clFinish
@@ -122,13 +124,13 @@ void device_synchronize(const command_queue &queue);
  *          Additionally, adds the path to the currently used OpenCL library as a comment to the kernel source string (before the checksum calculation) to detect
  *          changes in the used OpenCL implementation and trigger a kernel rebuild.
  *
+ * @param[in] comm the MPI communicator
  * @param[in] contexts the used OpenCL contexts
  * @param[in] kernel_function the kernel function
- * @param[in] kernel_names all kernel name for which an OpenCL cl_kernel should be build
  * @throws plssvm::invalid_file_format_exception if the file couldn't be read using [`std::ifstream::read`](https://en.cppreference.com/w/cpp/io/basic_istream/read)
- * @return the command queues with all necessary kernels (`[[nodiscard]]`)
+ * @return [the command queues with all necessary kernels; information regarding the JIT compilation] (`[[nodiscard]]`)
  */
-[[nodiscard]] std::vector<command_queue> create_command_queues(const std::vector<context> &contexts, kernel_function_type kernel_function, const std::vector<std::pair<compute_kernel_name, std::string>> &kernel_names);
+[[nodiscard]] std::pair<std::vector<command_queue>, jit_info> create_command_queues(const mpi::communicator &comm, const std::vector<context> &contexts, kernel_function_type kernel_function);
 
 /**
  * @brief Set all arguments in the parameter pack @p args for the kernel @p kernel.
diff --git a/include/plssvm/backends/OpenMP/csvm.hpp b/include/plssvm/backends/OpenMP/csvm.hpp
index 4fed44f0e..6dd02835f 100644
--- a/include/plssvm/backends/OpenMP/csvm.hpp
+++ b/include/plssvm/backends/OpenMP/csvm.hpp
@@ -18,6 +18,7 @@
 #include "plssvm/detail/move_only_any.hpp"  // plssvm::detail::move_only_any
 #include "plssvm/detail/type_traits.hpp"    // PLSSVM_REQUIRES, plssvm::detail::is_one_type_of
 #include "plssvm/matrix.hpp"                // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"      // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"             // plssvm::parameter, plssvm::detail::has_only_parameter_named_args_v
 #include "plssvm/solver_types.hpp"          // plssvm::solver_type
 #include "plssvm/svm/csvc.hpp"              // plssvm::csvc
@@ -132,17 +133,38 @@ class csvc : public ::plssvm::csvc,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvc(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::openmp::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVC using the OpenMP backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    explicit csvc(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::openmp::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the OpenMP backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::openmp::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVC using the OpenMP backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVC
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvc(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvc(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::openmp::csvm{ target } { }
 
     /**
@@ -152,7 +174,18 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::openmp::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the OpenMP backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvc(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::openmp::csvm{} { }
 
     /**
@@ -163,7 +196,19 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::openmp::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVC using the OpenMP backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::openmp::csvm{ target } { }
 };
 
@@ -180,17 +225,38 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvr(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::openmp::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVR using the OpenMP backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvr(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::openmp::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the OpenMP backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvr(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::openmp::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVR using the OpenMP backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVR
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvr(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvr(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::openmp::csvm{ target } { }
 
     /**
@@ -200,7 +266,18 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::openmp::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the OpenMP backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::openmp::csvm{} { }
 
     /**
@@ -211,7 +288,19 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::openmp::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVR using the OpenMP backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::openmp::csvm{ target } { }
 };
 
diff --git a/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
index 61729d9b8..e1041024a 100644
--- a/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
@@ -29,21 +29,25 @@ namespace plssvm::openmp::detail {
  * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars.
  * @param[in] num_rows the number of rows and columns in @p A
  * @param[in] num_rhs the number of rows in @p B and @p C
+ * @param[in] device_specific_num_rows the number of rows the current device is responsible for
+ * @param[in] row_offset the first row in @p data the current device is responsible for
  * @param[in] alpha the scalar alpha value
  * @param[in] A the matrix @p A
  * @param[in] B the matrix @p B
  * @param[in] beta the scalar beta value
  * @param[in,out] C the matrix @p C, also used as result matrix
  */
-inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
+inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
     // compute: C = alpha * A * B + beta * C with A in m x k, B in n x k, and C in n x m, alpha, beta as scalar
-    PLSSVM_ASSERT(A.size() == (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2, "A matrix sizes mismatch!: {} != {}", A.size(), (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2);
+    PLSSVM_ASSERT(!A.empty(), "A matrix may not be empty!");
     PLSSVM_ASSERT(B.shape() == (plssvm::shape{ num_rhs, num_rows }), "B matrix sizes mismatch!: {} != [{}, {}]", B.shape(), num_rhs, num_rows);
     PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(num_rows >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, num_rows);
+    PLSSVM_ASSERT(num_rows >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, num_rows);
 
     // calculate constants
     const auto blocked_num_rhs = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
-    const auto blocked_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_rows) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
@@ -52,7 +56,7 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
 
 #pragma omp parallel for collapse(2)
     for (std::size_t rhs = 0; rhs < blocked_num_rhs; rhs += THREAD_BLOCK_SIZE_uz) {
-        for (std::size_t row = 0; row < blocked_num_rows; row += THREAD_BLOCK_SIZE_uz) {
+        for (std::size_t row = 0; row < blocked_device_specific_num_rows; row += THREAD_BLOCK_SIZE_uz) {
             // perform operations on the current block
             for (std::size_t rhs_block = 0; rhs_block < THREAD_BLOCK_SIZE_uz; ++rhs_block) {
                 for (std::size_t row_block = 0; row_block < THREAD_BLOCK_SIZE_uz; ++row_block) {
@@ -64,21 +68,21 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
                     std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
 
                     // iterate over all features
-                    for (std::size_t dim = 0; dim < num_rows; ++dim) {
+                    for (std::size_t dim = 0; dim < (num_rows - row_offset); ++dim) {
                         // perform the dot product calculation
                         for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
                             for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
-                                const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
-                                const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+                                const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_j);
 
                                 real_type A_val = 0.0;
                                 // determine on which side of the diagonal we are located
-                                if (dim < global_j) {
-                                    A_val = A[dim * (num_rows + PADDING_SIZE_uz) + global_j - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
+                                if (dim < global_row) {
+                                    A_val = A[dim * (num_rows - row_offset + PADDING_SIZE_uz) + global_row - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
                                 } else {
-                                    A_val = A[global_j * (num_rows + PADDING_SIZE_uz) + dim - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
+                                    A_val = A[global_row * (num_rows - row_offset + PADDING_SIZE_uz) + dim - global_row * (global_row + std::size_t{ 1 }) / std::size_t{ 2 }];
                                 }
-                                temp[internal_i][internal_j] += A_val * B(global_i, dim);
+                                temp[internal_i][internal_j] += A_val * B(global_rhs, dim + row_offset);
                             }
                         }
                     }
@@ -86,12 +90,90 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
                     // apply the (partial) BLAS operation and update C
                     for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
                         for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
-                            const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
-                            const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+                            const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                            const std::size_t device_global_row = row_idx + static_cast<std::size_t>(internal_j);
+                            const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_j);
 
                             // be sure to not perform out of bounds accesses
-                            if (global_i < num_rhs && global_j < num_rows) {
-                                C(global_i, global_j) = alpha * temp[internal_i][internal_j] + beta * C(global_i, global_j);
+                            if (global_rhs < num_rhs && device_global_row < device_specific_num_rows) {
+                                C(global_rhs, global_row) = alpha * temp[internal_i][internal_j] + beta * C(global_rhs, global_row);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+/**
+ * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars.
+ * @param[in] num_rows the number of rows in @p A and @p C
+ * @param[in] num_rhs the number of columns in @p B and @p C
+ * @param[in] num_mirror_rows the number of rows to mirror down
+ * @param[in] device_specific_num_rows the number of rows in @p A and number of rows in @p B; thr rows in @p A are potentially distributed across multiple devices
+ * @param[in] row_offset the first row this device is responsible for
+ * @param[in] alpha the scalar alpha value
+ * @param[in] A the matrix @p A
+ * @param[in] B the matrix @p B
+ * @param[in] beta the scalar beta value
+ * @param[in,out] C the matrix @p C, also used as result matrix
+ */
+inline void device_kernel_symm_mirror(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t num_mirror_rows, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
+    // compute: C = alpha * A * B + beta * C with A in m x k, B in n x k, and C in n x m, alpha, beta as scalar
+    PLSSVM_ASSERT(!A.empty(), "A matrix may not be empty!");
+    PLSSVM_ASSERT(B.shape() == (plssvm::shape{ num_rhs, num_rows }), "B matrix sizes mismatch!: {} != [{}, {}]", B.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(num_rows >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, num_rows);
+    PLSSVM_ASSERT(num_rows >= num_mirror_rows, "The number of mirror rows ({}) cannot be greater the the total number of rows ({})!", num_mirror_rows, num_rows);
+    PLSSVM_ASSERT(num_rows >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, num_rows);
+
+    // calculate constants
+    const auto blocked_num_rhs = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_num_mirror_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_mirror_rows) / INTERNAL_BLOCK_SIZE));
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto THREAD_BLOCK_SIZE_uz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+#pragma omp parallel for collapse(2)
+    for (std::size_t rhs = 0; rhs < blocked_num_rhs; rhs += THREAD_BLOCK_SIZE_uz) {
+        for (std::size_t row = 0; row < blocked_num_mirror_rows; row += THREAD_BLOCK_SIZE_uz) {
+            // perform operations on the current block
+            for (std::size_t rhs_block = 0; rhs_block < THREAD_BLOCK_SIZE_uz; ++rhs_block) {
+                for (std::size_t row_block = 0; row_block < THREAD_BLOCK_SIZE_uz; ++row_block) {
+                    // calculate the indices used in the current thread
+                    const std::size_t rhs_idx = (rhs + rhs_block) * INTERNAL_BLOCK_SIZE_uz;
+                    const std::size_t row_idx = (row + row_block) * INTERNAL_BLOCK_SIZE_uz;
+
+                    // create a thread private array used for internal caching
+                    std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+                    // iterate over all features
+                    for (std::size_t dim = 0; dim < device_specific_num_rows; ++dim) {
+                        // perform the dot product calculation
+                        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                                const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_j);
+
+                                const real_type A_val = A[dim * (num_rows - row_offset + PADDING_SIZE_uz) - (dim - std::size_t{ 1 }) * dim / std::size_t{ 2 } + device_specific_num_rows - dim + global_row];
+                                temp[internal_i][internal_j] += A_val * B(global_rhs, row_offset + dim);
+                            }
+                        }
+                    }
+
+                    // apply the (partial) BLAS operation and update C
+                    for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                        for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                            const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                            const std::size_t partial_global_row = row_idx + static_cast<std::size_t>(internal_j);
+                            const std::size_t global_row = row_offset + device_specific_num_rows + row_idx + static_cast<std::size_t>(internal_j);
+
+                            // be sure to not perform out of bounds accesses
+                            if (global_rhs < num_rhs && partial_global_row < num_mirror_rows) {
+                                C(global_rhs, global_row) = alpha * temp[internal_i][internal_j] + beta * C(global_rhs, global_row);
                             }
                         }
                     }
diff --git a/include/plssvm/backends/OpenMP/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/OpenMP/kernel/cg_explicit/kernel_matrix_assembly.hpp
index 64860d848..9403b12a1 100644
--- a/include/plssvm/backends/OpenMP/kernel/cg_explicit/kernel_matrix_assembly.hpp
+++ b/include/plssvm/backends/OpenMP/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -30,23 +30,28 @@ namespace plssvm::openmp::detail {
  * @brief Assemble the kernel matrix using the @p kernel function.
  * @tparam kernel the compile-time kernel function to use
  * @tparam Args the types of the potential additional arguments for the @p kernel function
- * @param[in] q the `q` vector
  * @param[out] kernel_matrix the resulting kernel matrix
  * @param[in] data the data matrix
+ * @param[in] device_specific_num_rows the number of rows the current device is responsible for
+ * @param[in] row_offset the first row in @p data the current device is responsible for
+ * @param[in] q the `q` vector
  * @param[in] QA_cost he bottom right matrix entry multiplied by cost
  * @param[in] cost 1 / the cost parameter in the C-SVM
  * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
  */
 template <kernel_function_type kernel, typename... Args>
-void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_type> &kernel_matrix, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, Args... kernel_function_parameter) {
+void device_kernel_assembly(std::vector<real_type> &kernel_matrix, const soa_matrix<real_type> &data, const std::size_t device_specific_num_rows, const std::size_t row_offset, const std::vector<real_type> &q, const real_type QA_cost, const real_type cost, Args... kernel_function_parameter) {
     PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
-    PLSSVM_ASSERT(kernel_matrix.size() == (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2, "Sizes mismatch (SYMM)!: {} != {}", kernel_matrix.size(), (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2);
+    PLSSVM_ASSERT(!kernel_matrix.empty(), "A matrix may not be empty!");
+    PLSSVM_ASSERT(q.size() >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, q.size());
+    PLSSVM_ASSERT(q.size() >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, q.size());
     PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
 
     // calculate constants
-    const std::size_t dept = q.size();
-    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_rows = data.num_rows() - 1;
     const std::size_t num_features = data.num_cols();
+    const auto blocked_row_range = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows - row_offset) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_rows) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
@@ -54,8 +59,8 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
 #pragma omp parallel for collapse(2) schedule(dynamic)
-    for (std::size_t row = 0; row < blocked_dept; row += THREAD_BLOCK_SIZE_uz) {
-        for (std::size_t col = 0; col < blocked_dept; col += THREAD_BLOCK_SIZE_uz) {
+    for (std::size_t row = 0; row < blocked_row_range; row += THREAD_BLOCK_SIZE_uz) {
+        for (std::size_t col = 0; col < blocked_device_specific_num_rows; col += THREAD_BLOCK_SIZE_uz) {
             // perform operations on the current block
             for (std::size_t row_block = 0; row_block < THREAD_BLOCK_SIZE_uz; ++row_block) {
                 for (std::size_t col_block = 0; col_block < THREAD_BLOCK_SIZE_uz; ++col_block) {
@@ -73,8 +78,8 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
                             // perform the feature reduction calculation
                             for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
                                 for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+                                    const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                                    const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
 
                                     temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data(global_row, dim), data(global_col, dim));
                                 }
@@ -85,11 +90,13 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
                         for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
                             for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
                                 // calculate the indices to access the kernel matrix (the part stored on the current device)
-                                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+                                const std::size_t device_global_row = row_idx + static_cast<std::size_t>(internal_row);
+                                const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                                const std::size_t device_global_col = col_idx + static_cast<std::size_t>(internal_col);
+                                const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
 
                                 // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
-                                if (global_row < dept && global_col < dept && global_row >= global_col) {
+                                if (device_global_row < (num_rows - row_offset) && device_global_col < device_specific_num_rows && global_row >= global_col) {
                                     real_type temp_ij = temp[internal_row][internal_col];
                                     temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
                                     // apply the cost on the diagonal
@@ -97,7 +104,7 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
                                         temp_ij += cost;
                                     }
                                     // update the kernel matrix
-                                    kernel_matrix[global_col * (dept + PADDING_SIZE_uz) + global_row - global_col * (global_col + std::size_t{ 1 }) / std::size_t{ 2 }] = temp_ij;
+                                    kernel_matrix[device_global_col * (num_rows - row_offset + PADDING_SIZE_uz) - device_global_col * (device_global_col + std::size_t{ 1 }) / std::size_t{ 2 } + device_global_row] = temp_ij;
                                 }
                             }
                         }
diff --git a/include/plssvm/backends/OpenMP/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/OpenMP/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
index 765b5f0ff..771689209 100644
--- a/include/plssvm/backends/OpenMP/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
+++ b/include/plssvm/backends/OpenMP/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -15,7 +15,6 @@
 
 #include "plssvm/constants.hpp"              // plssvm::real_type, plssvm::THREAD_BLOCK_SIZE, plssvm::INTERNAL_BLOCK_SIZE
 #include "plssvm/detail/assert.hpp"          // PLSSVM_ASSERT
-#include "plssvm/detail/operators.hpp"       // overloaded arithmetic operations for a plssvm::matrix
 #include "plssvm/kernel_function_types.hpp"  // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                 // aos_matrix
 
@@ -33,38 +32,37 @@ namespace plssvm::openmp::detail {
  * @param[in] alpha the scalar alpha value
  * @param[in] q the `q` vector
  * @param[in] data the data matrix
+ * @param[in] device_specific_num_rows the number of rows the current device is responsible for
+ * @param[in] row_offset the first row in @p data the current device is responsible for
  * @param[in] QA_cost he bottom right matrix entry multiplied by cost
  * @param[in] cost 1 / the cost parameter in the C-SVM
  * @param[in] B the matrix @p B
- * @param[in] beta the beta alpha value
  * @param[in,out] C the matrix @p C
  * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
  */
 template <kernel_function_type kernel, typename... Args>
-inline void device_kernel_assembly_symm(const real_type alpha, const std::vector<real_type> &q, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C, Args... kernel_function_parameter) {
+inline void device_kernel_assembly_symm(const real_type alpha, const std::vector<real_type> &q, const soa_matrix<real_type> &data, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type QA_cost, const real_type cost, const soa_matrix<real_type> &B, soa_matrix<real_type> &C, Args... kernel_function_parameter) {
     PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
+    PLSSVM_ASSERT(q.size() >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, q.size());
+    PLSSVM_ASSERT(q.size() >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, q.size());
     PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
     PLSSVM_ASSERT(B.shape() == C.shape(), "The matrices B and C must have the same shape!");
     PLSSVM_ASSERT(B.num_cols() == q.size(), "The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), q.size());
 
-    using namespace operators;
-
-    // alpha * A * B + beta * C
-    C *= beta;
-
     // calculate constants
-    const std::size_t dept = q.size();
-    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_rows = data.num_rows() - 1;
     const std::size_t num_features = data.num_cols();
     const std::size_t num_classes = B.num_rows();
+    const auto blocked_row_range = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows - row_offset) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_rows) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto THREAD_BLOCK_SIZE_uz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
 
 #pragma omp parallel for collapse(2) schedule(dynamic)
-    for (std::size_t row = 0; row < blocked_dept; row += THREAD_BLOCK_SIZE_uz) {
-        for (std::size_t col = 0; col < blocked_dept; col += THREAD_BLOCK_SIZE_uz) {
+    for (std::size_t row = 0; row < blocked_row_range; row += THREAD_BLOCK_SIZE_uz) {
+        for (std::size_t col = 0; col < blocked_device_specific_num_rows; col += THREAD_BLOCK_SIZE_uz) {
             // perform operations on the current block
             for (std::size_t row_block = 0; row_block < THREAD_BLOCK_SIZE_uz; ++row_block) {
                 for (std::size_t col_block = 0; col_block < THREAD_BLOCK_SIZE_uz; ++col_block) {
@@ -81,8 +79,8 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
                         for (std::size_t dim = 0; dim < num_features; ++dim) {
                             for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
                                 for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+                                    const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                                    const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
 
                                     temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data(global_row, dim), data(global_col, dim));
                                 }
@@ -92,11 +90,13 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
                         // apply the remaining part of the kernel function and store the value in the output kernel matrix
                         for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
                             for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+                                const std::size_t device_global_row = row_idx + static_cast<std::size_t>(internal_row);
+                                const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                                const std::size_t device_global_col = col_idx + static_cast<std::size_t>(internal_col);
+                                const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
 
                                 // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
-                                if (global_row < dept && global_col < dept && global_row >= global_col) {
+                                if (device_global_row < (num_rows - row_offset) && device_global_col < device_specific_num_rows && global_row >= global_col) {
                                     real_type temp_ij = temp[internal_row][internal_col];
                                     temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
                                     // apply the cost on the diagonal
diff --git a/include/plssvm/backends/OpenMP/kernel/predict_kernel.hpp b/include/plssvm/backends/OpenMP/kernel/predict_kernel.hpp
index ceff4190e..407096055 100644
--- a/include/plssvm/backends/OpenMP/kernel/predict_kernel.hpp
+++ b/include/plssvm/backends/OpenMP/kernel/predict_kernel.hpp
@@ -31,23 +31,26 @@ namespace plssvm::openmp::detail {
  * @param[out] w the vector to speedup the linear prediction
  * @param[in] alpha the previously learned weights
  * @param[in] support_vectors the support vectors
+ * @param[in] device_specific_num_sv the number of support vectors the current device is responsible for
+ * @param[in] sv_offset the first row in @p support_vectors the current device is responsible for
  */
-inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<real_type> &alpha, const soa_matrix<real_type> &support_vectors) {
+inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<real_type> &alpha, const soa_matrix<real_type> &support_vectors, const std::size_t device_specific_num_sv, const std::size_t sv_offset) {
     PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
     PLSSVM_ASSERT(w.shape() == (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }), "Shape mismatch: {} vs {}!", w.shape(), (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }));
+    PLSSVM_ASSERT(support_vectors.num_rows() >= device_specific_num_sv, "The number of place specific sv ({}) cannot be greater the the total number of sv ({})!", device_specific_num_sv, support_vectors.num_rows());
+    PLSSVM_ASSERT(support_vectors.num_rows() >= sv_offset, "The sv offset ({}) cannot be greater the the total number of sv ({})!", sv_offset, support_vectors.num_rows());
 
     // calculate constants
     const std::size_t num_classes = alpha.num_rows();
-    const std::size_t num_support_vectors = support_vectors.num_rows();
     const std::size_t num_features = support_vectors.num_cols();
 
-#pragma omp parallel for collapse(2) default(none) shared(w, support_vectors, alpha) firstprivate(num_classes, num_features, num_support_vectors)
+#pragma omp parallel for collapse(2) default(none) shared(w, support_vectors, alpha) firstprivate(num_classes, num_features, device_specific_num_sv, sv_offset)
     for (std::size_t a = 0; a < num_classes; ++a) {
         for (std::size_t dim = 0; dim < num_features; ++dim) {
             real_type temp{ 0.0 };
 #pragma omp simd reduction(+ : temp)
-            for (std::size_t idx = 0; idx < num_support_vectors; ++idx) {
-                temp = std::fma(alpha(a, idx), support_vectors(idx, dim), temp);
+            for (std::size_t idx = 0; idx < device_specific_num_sv; ++idx) {
+                temp = std::fma(alpha(a, sv_offset + idx), support_vectors(sv_offset + idx, dim), temp);
             }
             w(a, dim) = temp;
         }
@@ -60,26 +63,29 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
  * @param[in] w the vector to speedup the calculations
  * @param[in] rho the previously learned bias
  * @param[in] predict_points the data points to predict
+ * @param[in] device_specific_num_predict_points the number of predict points the current device is responsible for
+ * @param[in] row_offset the first row in @p predict_points the current device is responsible for
  */
-inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, const soa_matrix<real_type> &w, const std::vector<real_type> &rho, const soa_matrix<real_type> &predict_points) {
+inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, const soa_matrix<real_type> &w, const std::vector<real_type> &rho, const soa_matrix<real_type> &predict_points, const std::size_t device_specific_num_predict_points, const std::size_t row_offset) {
     PLSSVM_ASSERT(w.num_rows() == rho.size(), "Size mismatch: {} vs {}!", w.num_rows(), rho.size());
     PLSSVM_ASSERT(w.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", w.num_cols(), predict_points.num_cols());
     PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), w.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), w.num_rows() }));
+    PLSSVM_ASSERT(predict_points.num_rows() >= device_specific_num_predict_points, "The number of place specific predict points ({}) cannot be greater the the total number of predict points ({})!", device_specific_num_predict_points, predict_points.num_rows());
+    PLSSVM_ASSERT(predict_points.num_rows() >= row_offset, "The row offset ({}) cannot be greater the the total number of predict points ({})!", row_offset, predict_points.num_rows());
 
     // calculate constants
     const std::size_t num_classes = prediction.num_cols();
-    const std::size_t num_predict_points = predict_points.num_rows();
     const std::size_t num_features = predict_points.num_cols();
 
-#pragma omp parallel for collapse(2) default(none) shared(prediction, w, rho, predict_points) firstprivate(num_classes, num_features, num_predict_points)
-    for (std::size_t point_index = 0; point_index < num_predict_points; ++point_index) {
+#pragma omp parallel for collapse(2) default(none) shared(prediction, w, rho, predict_points) firstprivate(num_classes, num_features, device_specific_num_predict_points, row_offset)
+    for (std::size_t point_index = 0; point_index < device_specific_num_predict_points; ++point_index) {
         for (std::size_t a = 0; a < num_classes; ++a) {
             real_type temp{ 0.0 };
 #pragma omp simd reduction(+ : temp)
             for (std::size_t dim = 0; dim < num_features; ++dim) {
-                temp = std::fma(w(a, dim), predict_points(point_index, dim), temp);
+                temp = std::fma(w(a, dim), predict_points(row_offset + point_index, dim), temp);
             }
-            prediction(point_index, a) = temp - rho[a];
+            prediction(row_offset + point_index, a) = temp - rho[a];
         }
     }
 }
@@ -93,21 +99,24 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
  * @param[in] rho the previously learned bias
  * @param[in] support_vectors the support vectors
  * @param[in] predict_points the data points to predict
+ * @param[in] device_specific_num_predict_points the number of predict points the current device is responsible for
+ * @param[in] row_offset the first row in @p predict_points the current device is responsible for
  * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function
  */
 template <kernel_function_type kernel, typename... Args>
-inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, const soa_matrix<real_type> &support_vectors, const soa_matrix<real_type> &predict_points, Args... kernel_function_parameter) {
+inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, const soa_matrix<real_type> &support_vectors, const soa_matrix<real_type> &predict_points, const std::size_t device_specific_num_predict_points, const std::size_t row_offset, Args... kernel_function_parameter) {
     PLSSVM_ASSERT(alpha.num_rows() == rho.size(), "Size mismatch: {} vs {}!", alpha.num_rows(), rho.size());
     PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
     PLSSVM_ASSERT(support_vectors.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", support_vectors.num_cols(), predict_points.num_cols());
     PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }));
+    PLSSVM_ASSERT(predict_points.num_rows() >= device_specific_num_predict_points, "The number of place specific predict points ({}) cannot be greater the the total number of predict points ({})!", device_specific_num_predict_points, predict_points.num_rows());
+    PLSSVM_ASSERT(predict_points.num_rows() >= row_offset, "The row offset ({}) cannot be greater the the total number of predict points ({})!", row_offset, predict_points.num_rows());
 
     // calculate constants
     const std::size_t num_classes = alpha.num_rows();
     const std::size_t num_support_vectors = support_vectors.num_rows();
     const auto blocked_num_support_vectors = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_support_vectors) / INTERNAL_BLOCK_SIZE));
-    const std::size_t num_predict_points = predict_points.num_rows();
-    const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_predict_points) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_features = predict_points.num_cols();
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
@@ -115,14 +124,14 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
     const auto THREAD_BLOCK_SIZE_uz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
 
 #pragma omp parallel for collapse(2)
-    for (std::size_t point_index = 0; point_index < num_predict_points; ++point_index) {
+    for (std::size_t point_index = 0; point_index < device_specific_num_predict_points; ++point_index) {
         for (std::size_t a = 0; a < num_classes; ++a) {
-            prediction(point_index, a) -= rho[a];
+            prediction(row_offset + point_index, a) -= rho[a];
         }
     }
 
 #pragma omp parallel for collapse(2)
-    for (std::size_t pp = 0; pp < blocked_num_predict_points; pp += THREAD_BLOCK_SIZE_uz) {
+    for (std::size_t pp = 0; pp < blocked_device_specific_num_predict_points; pp += THREAD_BLOCK_SIZE_uz) {
         for (std::size_t sv = 0; sv < blocked_num_support_vectors; sv += THREAD_BLOCK_SIZE_uz) {
             // perform operations on the current block
             for (std::size_t pp_block = 0; pp_block < THREAD_BLOCK_SIZE_uz; ++pp_block) {
@@ -139,7 +148,7 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
                         // perform the feature reduction calculation
                         for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
                             for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
-                                const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                                const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                                 const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
 
                                 temp[internal_pp][internal_sv] += detail::feature_reduce<kernel>(support_vectors(global_sv_idx, dim), predict_points(global_pp_idx, dim));
@@ -158,11 +167,12 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
                     for (std::size_t a = 0; a < num_classes; ++a) {
                         for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
                             for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
-                                const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                                const std::size_t device_global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                                const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                                 const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
 
                                 // be sure to not perform out of bounds accesses
-                                if (global_pp_idx < num_predict_points && global_sv_idx < num_support_vectors) {
+                                if (device_global_pp_idx < device_specific_num_predict_points && global_sv_idx < num_support_vectors) {
 #pragma omp atomic
                                     prediction(global_pp_idx, a) += alpha(a, global_sv_idx) * temp[internal_pp][internal_sv];
                                 }
diff --git a/include/plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp b/include/plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp
index 088a4a875..4fd639732 100644
--- a/include/plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp
+++ b/include/plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp
@@ -23,6 +23,7 @@
 #include "plssvm/detail/igor_utility.hpp"                             // plssvm::detail::get_value_from_named_parameter
 #include "plssvm/detail/memory_size.hpp"                              // plssvm::detail::memory_size
 #include "plssvm/detail/type_traits.hpp"                              // PLSSVM_REQUIRES, plssvm::detail::is_one_type_of
+#include "plssvm/mpi/communicator.hpp"                                // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                                       // plssvm::parameter, plssvm::detail::{has_only_sycl_parameter_named_args_v, has_only_sycl_named_args_v}
 #include "plssvm/svm/csvc.hpp"                                        // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                                        // plssvm::detail::csvm_backend_exists
@@ -189,19 +190,44 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
     explicit csvc(const parameter params, Args &&...named_sycl_args) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::adaptivecpp::csvm(target_platform::automatic, std::forward<Args>(named_sycl_args)...) { }
 
+    /**
+     * @brief Construct a new C-SVC using the AdaptiveCpp backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @param[in] named_sycl_args the additional optional SYCL specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::adaptivecpp::csvm(target_platform::automatic, std::forward<Args>(named_sycl_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVC using the AdaptiveCpp backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @param[in] named_sycl_args the additional optional SYCL specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
+    csvc(const target_platform target, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::adaptivecpp::csvm(target, std::forward<Args>(named_sycl_args)...) { }
+
     /**
      * @brief Construct a new C-SVC using the AdaptiveCpp backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVC
      * @param[in] params struct encapsulating all possible SVM parameters
      * @param[in] named_sycl_args the additional optional SYCL specific named arguments
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
-    explicit csvc(const target_platform target, const parameter params, Args &&...named_sycl_args) :
-        ::plssvm::csvm{ params },
+    csvc(mpi::communicator comm, const target_platform target, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::adaptivecpp::csvm(target, std::forward<Args>(named_sycl_args)...) { }
 
     /**
@@ -211,7 +237,18 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
     explicit csvc(Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::adaptivecpp::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVC using the AdaptiveCpp backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
+    explicit csvc(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::adaptivecpp::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
 
     /**
@@ -222,7 +259,19 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
     explicit csvc(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::adaptivecpp::csvm(target, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVC using the AdaptiveCpp backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::adaptivecpp::csvm(target, std::forward<Args>(named_args)...) { }
 };
 
@@ -240,20 +289,45 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
-    explicit csvr(parameter params, Args &&...named_sycl_args) :
-        ::plssvm::csvm{ params },
+    explicit csvr(const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::adaptivecpp::csvm(target_platform::automatic, std::forward<Args>(named_sycl_args)...) { }
 
+    /**
+     * @brief Construct a new C-SVR using the AdaptiveCpp backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @param[in] named_sycl_args the additional optional SYCL specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::adaptivecpp::csvm(target_platform::automatic, std::forward<Args>(named_sycl_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVR using the AdaptiveCpp backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @param[in] named_sycl_args the additional optional SYCL specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
+    csvr(const target_platform target, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::adaptivecpp::csvm(target, std::forward<Args>(named_sycl_args)...) { }
+
     /**
      * @brief Construct a new C-SVR using the AdaptiveCpp backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVR
      * @param[in] params struct encapsulating all possible SVM parameters
      * @param[in] named_sycl_args the additional optional SYCL specific named arguments
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
-    explicit csvr(target_platform target, parameter params, Args &&...named_sycl_args) :
-        ::plssvm::csvm{ params },
+    csvr(mpi::communicator comm, const target_platform target, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::adaptivecpp::csvm(target, std::forward<Args>(named_sycl_args)...) { }
 
     /**
@@ -263,7 +337,18 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
     explicit csvr(Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::adaptivecpp::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVR using the AdaptiveCpp backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::adaptivecpp::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
 
     /**
@@ -274,7 +359,19 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
     explicit csvr(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::adaptivecpp::csvm(target, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVR using the AdaptiveCpp backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::adaptivecpp::csvm(target, std::forward<Args>(named_args)...) { }
 };
 
diff --git a/include/plssvm/backends/SYCL/DPCPP/csvm.hpp b/include/plssvm/backends/SYCL/DPCPP/csvm.hpp
index 0af8b5a83..4bcdc2da9 100644
--- a/include/plssvm/backends/SYCL/DPCPP/csvm.hpp
+++ b/include/plssvm/backends/SYCL/DPCPP/csvm.hpp
@@ -23,6 +23,7 @@
 #include "plssvm/detail/igor_utility.hpp"                       // plssvm::detail::get_value_from_named_parameter
 #include "plssvm/detail/memory_size.hpp"                        // plssvm::detail::memory_size
 #include "plssvm/detail/type_traits.hpp"                        // PLSSVM_REQUIRES, plssvm::detail::is_one_type_of
+#include "plssvm/mpi/communicator.hpp"                          // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                                 // plssvm::parameter, plssvm::detail::{has_only_sycl_parameter_named_args_v, has_only_sycl_named_args_v}
 #include "plssvm/svm/csvc.hpp"                                  // plssvm::csvc
 #include "plssvm/svm/csvm.hpp"                                  // plssvm::detail::csvm_backend_exists
@@ -189,19 +190,44 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
     explicit csvc(const parameter params, Args &&...named_sycl_args) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::dpcpp::csvm(target_platform::automatic, std::forward<Args>(named_sycl_args)...) { }
 
+    /**
+     * @brief Construct a new C-SVC using the DPC++ backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @param[in] named_sycl_args the additional optional SYCL specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::dpcpp::csvm(target_platform::automatic, std::forward<Args>(named_sycl_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVC using the DPC++ backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @param[in] named_sycl_args the additional optional SYCL specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
+    csvc(const target_platform target, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::dpcpp::csvm(target, std::forward<Args>(named_sycl_args)...) { }
+
     /**
      * @brief Construct a new C-SVC using the DPC++ backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVC
      * @param[in] params struct encapsulating all possible SVM parameters
      * @param[in] named_sycl_args the additional optional SYCL specific named arguments
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
-    explicit csvc(const target_platform target, const parameter params, Args &&...named_sycl_args) :
-        ::plssvm::csvm{ params },
+    csvc(mpi::communicator comm, const target_platform target, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::dpcpp::csvm(target, std::forward<Args>(named_sycl_args)...) { }
 
     /**
@@ -211,7 +237,18 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
     explicit csvc(Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::dpcpp::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVC using the DPC++ backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
+    explicit csvc(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::dpcpp::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
 
     /**
@@ -222,7 +259,19 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
     explicit csvc(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::dpcpp::csvm(target, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVC using the DPC++ backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::dpcpp::csvm(target, std::forward<Args>(named_args)...) { }
 };
 
@@ -240,20 +289,45 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
-    explicit csvr(parameter params, Args &&...named_sycl_args) :
-        ::plssvm::csvm{ params },
+    explicit csvr(const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::dpcpp::csvm(target_platform::automatic, std::forward<Args>(named_sycl_args)...) { }
 
+    /**
+     * @brief Construct a new C-SVR using the DPC++ backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @param[in] named_sycl_args the additional optional SYCL specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::dpcpp::csvm(target_platform::automatic, std::forward<Args>(named_sycl_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVR using the DPC++ backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @param[in] named_sycl_args the additional optional SYCL specific named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
+    csvr(const target_platform target, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::dpcpp::csvm(target, std::forward<Args>(named_sycl_args)...) { }
+
     /**
      * @brief Construct a new C-SVR using the DPC++ backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVR
      * @param[in] params struct encapsulating all possible SVM parameters
      * @param[in] named_sycl_args the additional optional SYCL specific named arguments
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_named_args_v<Args...>)>
-    explicit csvr(target_platform target, parameter params, Args &&...named_sycl_args) :
-        ::plssvm::csvm{ params },
+    csvr(mpi::communicator comm, const target_platform target, const parameter params, Args &&...named_sycl_args) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::dpcpp::csvm(target, std::forward<Args>(named_sycl_args)...) { }
 
     /**
@@ -263,7 +337,18 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
     explicit csvr(Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::dpcpp::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVR using the DPC++ backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::dpcpp::csvm(target_platform::automatic, std::forward<Args>(named_args)...) { }
 
     /**
@@ -274,7 +359,19 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
     explicit csvr(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ named_args... },
+        ::plssvm::csvm{ mpi::communicator{}, named_args... },
+        ::plssvm::dpcpp::csvm(target, std::forward<Args>(named_args)...) { }
+
+    /**
+     * @brief Construct a new C-SVR using the DPC++ backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_sycl_parameter_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), named_args... },
         ::plssvm::dpcpp::csvm(target, std::forward<Args>(named_args)...) { }
 };
 
diff --git a/include/plssvm/backends/gpu_csvm.hpp b/include/plssvm/backends/gpu_csvm.hpp
index aee660731..512fb521c 100644
--- a/include/plssvm/backends/gpu_csvm.hpp
+++ b/include/plssvm/backends/gpu_csvm.hpp
@@ -54,19 +54,10 @@ class gpu_csvm : virtual public ::plssvm::csvm {
     using pinned_memory_type = pinned_memory_t<real_type>;
 
     /**
-     * @copydoc plssvm::csvm::csvm()
+     * @brief Default constructor.
+     * @details Needed due to multiple-inheritance.
      */
-    explicit gpu_csvm(parameter params = {}) :
-        ::plssvm::csvm{ params } { }
-
-    /**
-     * @brief Construct a C-SVM forwarding all parameters @p args to the plssvm::parameter constructor.
-     * @tparam Args the type of the (named-)parameters
-     * @param[in] args the parameters used to construct a plssvm::parameter
-     */
-    template <typename... Args>
-    explicit gpu_csvm(Args &&...args) :
-        ::plssvm::csvm{ std::forward<Args>(args)... } { }
+    gpu_csvm() = default;
 
     /**
      * @copydoc plssvm::csvm::csvm(const plssvm::csvm &)
@@ -245,7 +236,7 @@ std::vector<::plssvm::detail::move_only_any> gpu_csvm<device_ptr_t, queue_t, pin
 
     // update the data distribution: only the upper triangular kernel matrix is used
     // note: account for the dimensional reduction
-    data_distribution_ = std::make_unique<detail::triangular_data_distribution>(A.num_rows() - 1, num_devices);
+    data_distribution_ = std::make_unique<detail::triangular_data_distribution>(comm_, num_rows_reduced, num_devices);
 
     // the final kernel matrix; multiple parts in case of multi-device execution
     std::vector<::plssvm::detail::move_only_any> kernel_matrices_parts(num_devices);
@@ -367,7 +358,7 @@ void gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::blas_level_3(const solver
 
         // copy data to the device
         B_d[device_id].copy_to_device(B);
-        if (device_id == 0) {
+        if (device_id == 0 && comm_.is_main_rank()) {
             // device 0 always touches all values in C -> it is sufficient that only device 0 gets the actual C matrix
             C_d[device_id].copy_to_device(C);
             // we do not perform the beta scale in C in the cg_implicit device kernel
@@ -527,7 +518,7 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
             }
 
             // update the data distribution to account for the support vectors
-            data_distribution_ = std::make_unique<detail::rectangular_data_distribution>(num_support_vectors, num_devices);
+            data_distribution_ = std::make_unique<detail::rectangular_data_distribution>(comm_, num_support_vectors, num_devices);
 
             std::vector<device_ptr_type> sv_d(num_devices);
             // split memory allocation and memory copy!
@@ -594,6 +585,9 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
             w = soa_matrix<real_type>{ shape{ num_classes, num_features }, shape{ PADDING_SIZE, PADDING_SIZE } };
             w_d[0].copy_to_host(w);
             w.restore_padding();
+
+            // reduce w on all MPI ranks
+            comm_.allreduce_inplace(w);
         }
 
         // upload the w vector to all devices
@@ -628,7 +622,7 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
     }
 
     // update the data distribution to account for the predict points
-    data_distribution_ = std::make_unique<detail::rectangular_data_distribution>(num_predict_points, num_devices);
+    data_distribution_ = std::make_unique<detail::rectangular_data_distribution>(comm_, num_predict_points, num_devices);
 
     // the predict points; partial stored on each device
     std::vector<device_ptr_type> predict_points_d(num_devices);
diff --git a/include/plssvm/backends/stdpar/csvm.hpp b/include/plssvm/backends/stdpar/csvm.hpp
index f98122545..109a436e8 100644
--- a/include/plssvm/backends/stdpar/csvm.hpp
+++ b/include/plssvm/backends/stdpar/csvm.hpp
@@ -19,6 +19,7 @@
 #include "plssvm/detail/move_only_any.hpp"                  // plssvm::detail::move_only_any
 #include "plssvm/detail/type_traits.hpp"                    // PLSSVM_REQUIRES, plssvm::detail::is_one_type_of
 #include "plssvm/matrix.hpp"                                // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"                      // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                             // plssvm::parameter, plssvm::detail::has_only_parameter_named_args_v
 #include "plssvm/solver_types.hpp"                          // plssvm::solver_type
 #include "plssvm/svm/csvc.hpp"                              // plssvm::csvc
@@ -139,17 +140,38 @@ class csvc : public ::plssvm::csvc,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvc(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::stdpar::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVC using the stdpar backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::stdpar::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the stdpar backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvc(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::stdpar::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVC using the stdpar backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVC
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvc(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvc(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::stdpar::csvm{ target } { }
 
     /**
@@ -159,7 +181,18 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::stdpar::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVC using the stdpar backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvc(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::stdpar::csvm{} { }
 
     /**
@@ -170,7 +203,19 @@ class csvc : public ::plssvm::csvc,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvc(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::stdpar::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVC using the stdpar backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVC
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvc(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::stdpar::csvm{ target } { }
 };
 
@@ -187,17 +232,38 @@ class csvr : public ::plssvm::csvr,
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
     explicit csvr(const parameter params) :
-        ::plssvm::csvm{ params },
+        ::plssvm::csvm{ mpi::communicator{}, params },
         ::plssvm::stdpar::csvm{} { }
 
+    /**
+     * @brief Construct a new C-SVR using the stdpar backend with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvr(mpi::communicator comm, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
+        ::plssvm::stdpar::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the stdpar backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    csvr(const target_platform target, const parameter params) :
+        ::plssvm::csvm{ mpi::communicator{}, params },
+        ::plssvm::stdpar::csvm{ target } { }
+
     /**
      * @brief Construct a new C-SVR using the stdpar backend on the @p target platform with the parameters given through @p params.
+     * @param[in] comm the used MPI communicator
      * @param[in] target the target platform used for this C-SVR
      * @param[in] params struct encapsulating all possible SVM parameters
      * @throws plssvm::exception all exceptions thrown in the base class constructors
      */
-    explicit csvr(const target_platform target, const parameter params) :
-        ::plssvm::csvm{ params },
+    csvr(mpi::communicator comm, const target_platform target, const parameter params) :
+        ::plssvm::csvm{ std::move(comm), params },
         ::plssvm::stdpar::csvm{ target } { }
 
     /**
@@ -207,7 +273,18 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::stdpar::csvm{} { }
+
+    /**
+     * @brief Construct a new C-SVR using the stdpar backend and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvr(mpi::communicator comm, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::stdpar::csvm{} { }
 
     /**
@@ -218,7 +295,19 @@ class csvr : public ::plssvm::csvr,
      */
     template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
     explicit csvr(const target_platform target, Args &&...named_args) :
-        ::plssvm::csvm{ std::forward<Args>(named_args)... },
+        ::plssvm::csvm{ mpi::communicator{}, std::forward<Args>(named_args)... },
+        ::plssvm::stdpar::csvm{ target } { }
+
+    /**
+     * @brief Construct a new C-SVR using the stdpar backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] comm the used MPI communicator
+     * @param[in] target the target platform used for this C-SVR
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructors
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    csvr(mpi::communicator comm, const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::move(comm), std::forward<Args>(named_args)... },
         ::plssvm::stdpar::csvm{ target } { }
 };
 
diff --git a/include/plssvm/backends/stdpar/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/stdpar/kernel/cg_explicit/blas.hpp
index c69f9b85b..63e9f9831 100644
--- a/include/plssvm/backends/stdpar/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/stdpar/kernel/cg_explicit/blas.hpp
@@ -32,33 +32,37 @@ namespace plssvm::stdpar::detail {
  * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a symmetric matrix (memory optimized), @p B and @p C are matrices, and @p alpha and @p beta are scalars.
  * @param[in] num_rows the number of rows in @p A and @p C
  * @param[in] num_rhs the number of columns in @p B and @p C
+ * @param[in] device_specific_num_rows the number of rows the current device is responsible for
+ * @param[in] row_offset the first row in @p data the current device is responsible for
  * @param[in] alpha the scalar alpha value
  * @param[in] A the matrix @p A
  * @param[in] B the matrix @p B
  * @param[in] beta the scalar beta value
  * @param[in,out] C the matrix @p C, also used as result matrix
  */
-inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
-    PLSSVM_ASSERT(A.size() == (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2, "A matrix sizes mismatch!: {} != {}", A.size(), (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2);
+inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
+    PLSSVM_ASSERT(!A.empty(), "A matrix may not be empty!");
     PLSSVM_ASSERT(B.shape() == (plssvm::shape{ num_rhs, num_rows }), "B matrix sizes mismatch!: {} != [{}, {}]", B.shape(), num_rhs, num_rows);
     PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(num_rows >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, num_rows);
+    PLSSVM_ASSERT(num_rows >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, num_rows);
 
     // calculate constants
     const auto blocked_num_rhs = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
-    const auto blocked_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_rows) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
     // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_num_rhs * blocked_num_rows);
+    std::vector<std::size_t> range(blocked_num_rhs * blocked_device_specific_num_rows);
     std::iota(range.begin(), range.end(), 0);
 
     std::for_each(std::execution::par_unseq, range.begin(), range.end(), [=, A_ptr = A.data(), B_ptr = B.data(), C_ptr = C.data()](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const std::size_t rhs = idx / blocked_num_rows;
-        const std::size_t row = idx % blocked_num_rows;
+        const std::size_t rhs = idx / blocked_device_specific_num_rows;
+        const std::size_t row = idx % blocked_device_specific_num_rows;
 
         const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
@@ -67,21 +71,21 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
         std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
 
         // iterate over all features
-        for (std::size_t dim = 0; dim < num_rows; ++dim) {
+        for (std::size_t dim = 0; dim < (num_rows - row_offset); ++dim) {
             // perform the dot product calculation
             for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
                 for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
-                    const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
-                    const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+                    const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_j);
 
                     real_type A_val = 0.0;
                     // determine on which side of the diagonal we are located
-                    if (dim < global_j) {
-                        A_val = A_ptr[dim * (num_rows + PADDING_SIZE_uz) + global_j - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
+                    if (dim < global_row) {
+                        A_val = A_ptr[dim * (num_rows - row_offset + PADDING_SIZE_uz) + global_row - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
                     } else {
-                        A_val = A_ptr[global_j * (num_rows + PADDING_SIZE_uz) + dim - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
+                        A_val = A_ptr[global_row * (num_rows - row_offset + PADDING_SIZE_uz) + dim - global_row * (global_row + std::size_t{ 1 }) / std::size_t{ 2 }];
                     }
-                    temp[internal_i][internal_j] += A_val * B_ptr[dim * (num_rhs + PADDING_SIZE_uz) + global_i];
+                    temp[internal_i][internal_j] += A_val * B_ptr[(dim + row_offset) * (num_rhs + PADDING_SIZE_uz) + global_rhs];
                 }
             }
         }
@@ -89,12 +93,88 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
         // apply the (partial) BLAS operation and update C
         for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
             for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
-                const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
-                const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+                const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                const std::size_t device_global_row = row_idx + static_cast<std::size_t>(internal_j);
+                const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_j);
 
                 // be sure to not perform out of bounds accesses
-                if (global_i < num_rhs && global_j < num_rows) {
-                    C_ptr[global_j * (num_rhs + PADDING_SIZE_uz) + global_i] = alpha * temp[internal_i][internal_j] + beta * C_ptr[global_j * (num_rhs + PADDING_SIZE_uz) + global_i];
+                if (global_rhs < num_rhs && device_global_row < device_specific_num_rows) {
+                    C_ptr[global_row * (num_rhs + PADDING_SIZE_uz) + global_rhs] = alpha * temp[internal_i][internal_j] + beta * C_ptr[global_row * (num_rhs + PADDING_SIZE_uz) + global_rhs];
+                }
+            }
+        }
+    });
+}
+
+/**
+ * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars.
+ * @param[in] num_rows the number of rows in @p A and @p C
+ * @param[in] num_rhs the number of columns in @p B and @p C
+ * @param[in] num_mirror_rows the number of rows to mirror down
+ * @param[in] device_specific_num_rows the number of rows in @p A and number of rows in @p B; thr rows in @p A are potentially distributed across multiple devices
+ * @param[in] row_offset the first row this device is responsible for
+ * @param[in] alpha the scalar alpha value
+ * @param[in] A the matrix @p A
+ * @param[in] B the matrix @p B
+ * @param[in] beta the scalar beta value
+ * @param[in,out] C the matrix @p C, also used as result matrix
+ */
+inline void device_kernel_symm_mirror(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t num_mirror_rows, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
+    // compute: C = alpha * A * B + beta * C with A in m x k, B in n x k, and C in n x m, alpha, beta as scalar
+    PLSSVM_ASSERT(!A.empty(), "A matrix may not be empty!");
+    PLSSVM_ASSERT(B.shape() == (plssvm::shape{ num_rhs, num_rows }), "B matrix sizes mismatch!: {} != [{}, {}]", B.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(num_rows >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, num_rows);
+    PLSSVM_ASSERT(num_rows >= num_mirror_rows, "The number of mirror rows ({}) cannot be greater the the total number of rows ({})!", num_mirror_rows, num_rows);
+    PLSSVM_ASSERT(num_rows >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, num_rows);
+
+    // calculate constants
+    const auto blocked_num_rhs = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_num_mirror_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_mirror_rows) / INTERNAL_BLOCK_SIZE));
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_rhs * blocked_num_mirror_rows);  // define range over which should be iterated
+    std::iota(range.begin(), range.end(), 0);
+
+    std::for_each(std::execution::par_unseq, range.begin(), range.end(), [=, A_ptr = A.data(), B_ptr = B.data(), C_ptr = C.data()](const std::size_t idx) {
+        // calculate the indices used in the current thread
+        const std::size_t rhs = idx / blocked_num_mirror_rows;
+        const std::size_t row = idx % blocked_num_mirror_rows;
+
+        const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < device_specific_num_rows; ++dim) {
+            // perform the dot product calculation
+            for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                    const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_j);
+
+                    const real_type A_val = A_ptr[dim * (num_rows - row_offset + PADDING_SIZE_uz) - (dim - std::size_t{ 1 }) * dim / std::size_t{ 2 } + device_specific_num_rows - dim + global_row];
+                    temp[internal_i][internal_j] += A_val * B_ptr[(dim + row_offset) * (num_rhs + PADDING_SIZE_uz) + global_rhs];
+                }
+            }
+        }
+
+        // apply the (partial) BLAS operation and update C
+        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                const std::size_t global_rhs = rhs_idx + static_cast<std::size_t>(internal_i);
+                const std::size_t partial_global_row = row_idx + static_cast<std::size_t>(internal_j);
+                const std::size_t global_row = row_offset + device_specific_num_rows + row_idx + static_cast<std::size_t>(internal_j);
+
+                // be sure to not perform out of bounds accesses
+                if (global_rhs < num_rhs && partial_global_row < num_mirror_rows) {
+                    C_ptr[global_row * (num_rhs + PADDING_SIZE_uz) + global_rhs] = alpha * temp[internal_i][internal_j] + beta * C_ptr[global_row * (num_rhs + PADDING_SIZE_uz) + global_rhs];
                 }
             }
         }
diff --git a/include/plssvm/backends/stdpar/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/stdpar/kernel/cg_explicit/kernel_matrix_assembly.hpp
index 3e67cf144..93772aab3 100644
--- a/include/plssvm/backends/stdpar/kernel/cg_explicit/kernel_matrix_assembly.hpp
+++ b/include/plssvm/backends/stdpar/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -33,72 +33,80 @@ namespace plssvm::stdpar::detail {
  * @brief Assemble the kernel matrix using the @p kernel function.
  * @tparam kernel the compile-time kernel function to use
  * @tparam Args the types of the potential additional arguments for the @p kernel function
- * @param[in] q the `q` vector
  * @param[out] kernel_matrix the resulting kernel matrix
  * @param[in] data the data matrix
+ * @param[in] device_specific_num_rows the number of rows the current device is responsible for
+ * @param[in] row_offset the first row in @p data the current device is responsible for
+ * @param[in] q the `q` vector
  * @param[in] QA_cost he bottom right matrix entry multiplied by cost
  * @param[in] cost 1 / the cost parameter in the C-SVM
  * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
  */
 template <kernel_function_type kernel, typename... Args>
-void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_type> &kernel_matrix, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, Args... kernel_function_parameter) {
+void device_kernel_assembly(std::vector<real_type> &kernel_matrix, const soa_matrix<real_type> &data, const std::size_t device_specific_num_rows, const std::size_t row_offset, const std::vector<real_type> &q, const real_type QA_cost, const real_type cost, Args... kernel_function_parameter) {
     PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
-    PLSSVM_ASSERT(kernel_matrix.size() == (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2, "Sizes mismatch (SYMM)!: {} != {}", kernel_matrix.size(), (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2);
+    PLSSVM_ASSERT(!kernel_matrix.empty(), "A matrix may not be empty!");
+    PLSSVM_ASSERT(q.size() >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, q.size());
+    PLSSVM_ASSERT(q.size() >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, q.size());
     PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
 
-    const std::size_t dept = q.size();
-    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    // calculate constants
+    const std::size_t num_rows = data.num_rows() - 1;
     const std::size_t num_features = data.num_cols();
+    const auto blocked_row_range = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows - row_offset) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_rows) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
-    // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_dept * (blocked_dept + 1) / 2);
-    std::iota(range.begin(), range.end(), 0);
+    // count the number of entries in the final index list
+    std::vector<std::size_t> indices(blocked_row_range * blocked_device_specific_num_rows);  // define range over which should be iterated
+    std::iota(indices.begin(), indices.end(), 0);
 
-    std::for_each(std::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), kernel_matrix_ptr = kernel_matrix.data()](const std::size_t idx) {
+    std::for_each(std::execution::par_unseq, indices.begin(), indices.end(), [=, q_ptr = q.data(), data_ptr = data.data(), kernel_matrix_ptr = kernel_matrix.data()](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const std::size_t col = static_cast<std::size_t>(static_cast<double>(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
-        const std::size_t row = static_cast<std::size_t>(0.5 * static_cast<double>(2 * (idx - col * blocked_dept) + col * col + col));
-
-        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
-        const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
-
-        // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
-        // create a thread private array used for internal caching
-        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
-
-        // iterate over all features
-        for (std::size_t dim = 0; dim < num_features; ++dim) {
-            // perform the feature reduction calculation
-            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
-                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
-
-                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+        const std::size_t row_idx = (idx / blocked_device_specific_num_rows) * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t col_idx = (idx % blocked_device_specific_num_rows) * INTERNAL_BLOCK_SIZE_uz;
+
+        // only calculate the upper triangular matrix
+        if (row_idx >= col_idx) {
+            // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+            // create a thread private array used for internal caching
+            std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+            // iterate over all features
+            for (std::size_t dim = 0; dim < num_features; ++dim) {
+                // perform the feature reduction calculation
+                for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                    for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                        const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                        const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
+
+                        temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data_ptr[dim * (num_rows + 1 + PADDING_SIZE_uz) + global_row], data_ptr[dim * (num_rows + 1 + PADDING_SIZE_uz) + global_col]);
+                    }
                 }
             }
-        }
 
-        // apply the remaining part of the kernel function and store the value in the output kernel matrix
-        for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
-            for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                // calculate the indices to access the kernel matrix (the part stored on the current device)
-                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
-
-                // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
-                if (global_row < dept && global_col < dept && global_row >= global_col) {
-                    real_type temp_ij = temp[internal_row][internal_col];
-                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q_ptr[global_row] - q_ptr[global_col];
-                    // apply the cost on the diagonal
-                    if (global_row == global_col) {
-                        temp_ij += cost;
+            // apply the remaining part of the kernel function and store the value in the output kernel matrix
+            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                    // calculate the indices to access the kernel matrix (the part stored on the current device)
+                    const std::size_t device_global_row = row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t device_global_col = col_idx + static_cast<std::size_t>(internal_col);
+                    const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
+
+                    // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                    if (device_global_row < (num_rows - row_offset) && device_global_col < device_specific_num_rows && global_row >= global_col) {
+                        real_type temp_ij = temp[internal_row][internal_col];
+                        temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q_ptr[global_row] - q_ptr[global_col];
+                        // apply the cost on the diagonal
+                        if (global_row == global_col) {
+                            temp_ij += cost;
+                        }
+                        kernel_matrix_ptr[device_global_col * (num_rows - row_offset + PADDING_SIZE_uz) - device_global_col * (device_global_col + std::size_t{ 1 }) / std::size_t{ 2 } + device_global_row] = temp_ij;
                     }
-                    kernel_matrix_ptr[global_col * (dept + PADDING_SIZE_uz) + global_row - global_col * (global_col + std::size_t{ 1 }) / std::size_t{ 2 }] = temp_ij;
                 }
             }
         }
diff --git a/include/plssvm/backends/stdpar/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/stdpar/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
index 04cdfa6a9..fdebd9cb5 100644
--- a/include/plssvm/backends/stdpar/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
+++ b/include/plssvm/backends/stdpar/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -17,7 +17,6 @@
 #include "plssvm/backends/stdpar/kernel/kernel_functions.hpp"  // plssvm::stdpar::detail::{feature_reduce, apply_kernel_function}
 #include "plssvm/constants.hpp"                                // plssvm::real_type
 #include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
-#include "plssvm/detail/operators.hpp"                         // overloaded arithmetic operations for a plssvm::matrix
 #include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
 #include "plssvm/kernel_functions.hpp"                         // plssvm::kernel_function
 #include "plssvm/matrix.hpp"                                   // aos_matrix
@@ -39,86 +38,87 @@ namespace plssvm::stdpar::detail {
  * @param[in] alpha the scalar alpha value
  * @param[in] q the `q` vector
  * @param[in] data the data matrix
+ * @param[in] device_specific_num_rows the number of rows the current device is responsible for
+ * @param[in] row_offset the first row in @p data the current device is responsible for
  * @param[in] QA_cost he bottom right matrix entry multiplied by cost
  * @param[in] cost 1 / the cost parameter in the C-SVM
  * @param[in] B the matrix @p B
- * @param[in] beta the beta alpha value
  * @param[in,out] C the matrix @p C
  * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
  */
 template <kernel_function_type kernel, typename... Args>
-inline void device_kernel_assembly_symm(const real_type alpha, const std::vector<real_type> &q, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C, Args... kernel_function_parameter) {
+inline void device_kernel_assembly_symm(const real_type alpha, const std::vector<real_type> &q, const soa_matrix<real_type> &data, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type QA_cost, const real_type cost, const soa_matrix<real_type> &B, soa_matrix<real_type> &C, Args... kernel_function_parameter) {
     PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
+    PLSSVM_ASSERT(q.size() >= device_specific_num_rows, "The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", device_specific_num_rows, q.size());
+    PLSSVM_ASSERT(q.size() >= row_offset, "The row offset ({}) cannot be greater the the total number of rows ({})!", row_offset, q.size());
     PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
     PLSSVM_ASSERT(B.shape() == C.shape(), "The matrices B and C must have the same shape!");
     PLSSVM_ASSERT(B.num_cols() == q.size(), "The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), q.size());
 
-    using namespace operators;
-
-    // alpha * A * B + beta * C
-    C *= beta;
-
     // calculate constants
-    const std::size_t dept = q.size();
-    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_rows = data.num_rows() - 1;
     const std::size_t num_features = data.num_cols();
     const std::size_t num_classes = B.num_rows();
+    const auto blocked_row_range = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows - row_offset) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_rows) / INTERNAL_BLOCK_SIZE));
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
-    // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_dept * (blocked_dept + 1) / 2);
-    std::iota(range.begin(), range.end(), 0);
+    // count the number of entries in the final index list
+    std::vector<std::size_t> indices(blocked_row_range * blocked_device_specific_num_rows);  // define range over which should be iterated
+    std::iota(indices.begin(), indices.end(), 0);
 
-    std::for_each(std::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), B_ptr = B.data(), C_ptr = C.data()](const std::size_t idx) {
+    std::for_each(std::execution::par_unseq, indices.begin(), indices.end(), [=, q_ptr = q.data(), data_ptr = data.data(), B_ptr = B.data(), C_ptr = C.data()](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const std::size_t col = static_cast<std::size_t>(static_cast<double>(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
-        const std::size_t row = static_cast<std::size_t>(0.5 * static_cast<double>(2 * (idx - col * blocked_dept) + col * col + col));
-
-        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
-        const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
-
-        // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
-        // create a thread private array used for internal caching
-        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
-
-        // iterate over all features
-        for (std::size_t dim = 0; dim < num_features; ++dim) {
-            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
-                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
-
-                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+        const std::size_t row_idx = (idx / blocked_device_specific_num_rows) * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t col_idx = (idx % blocked_device_specific_num_rows) * INTERNAL_BLOCK_SIZE_uz;
+
+        // only calculate the upper triangular matrix
+        if (row_idx >= col_idx) {
+            // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+            // create a thread private array used for internal caching
+            std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+            // iterate over all features
+            for (std::size_t dim = 0; dim < num_features; ++dim) {
+                for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                    for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                        const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                        const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
+
+                        temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data_ptr[dim * (num_rows + 1 + PADDING_SIZE_uz) + global_row], data_ptr[dim * (num_rows + 1 + PADDING_SIZE_uz) + global_col]);
+                    }
                 }
             }
-        }
 
-        // apply the remaining part of the kernel function and store the value in the output kernel matrix
-        for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
-            for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
-                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
-                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
-
-                // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
-                if (global_row < dept && global_col < dept && global_row >= global_col) {
-                    real_type temp_ij = temp[internal_row][internal_col];
-                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q_ptr[global_row] - q_ptr[global_col];
-                    // apply the cost on the diagonal
-                    if (global_row == global_col) {
-                        temp_ij += cost;
-                        // calculate the values of alpha * A * B
-                        for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
-                            atomic_ref<real_type>{ C_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
-                        }
-                    } else {
-                        // calculate the values of alpha * A * B
-                        for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
-                            atomic_ref<real_type>{ C_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_col * (num_classes + PADDING_SIZE_uz) + class_idx];
-                            // symmetry
-                            atomic_ref<real_type>{ C_ptr[global_col * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+            // apply the remaining part of the kernel function and store the value in the output kernel matrix
+            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                    const std::size_t device_global_row = row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t global_row = row_offset + row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t device_global_col = col_idx + static_cast<std::size_t>(internal_col);
+                    const std::size_t global_col = row_offset + col_idx + static_cast<std::size_t>(internal_col);
+
+                    // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                    if (device_global_row < (num_rows - row_offset) && device_global_col < device_specific_num_rows && global_row >= global_col) {
+                        real_type temp_ij = temp[internal_row][internal_col];
+                        temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q_ptr[global_row] - q_ptr[global_col];
+                        // apply the cost on the diagonal
+                        if (global_row == global_col) {
+                            temp_ij += cost;
+                            // calculate the values of alpha * A * B
+                            for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+                                atomic_ref<real_type>{ C_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+                            }
+                        } else {
+                            // calculate the values of alpha * A * B
+                            for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+                                atomic_ref<real_type>{ C_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_col * (num_classes + PADDING_SIZE_uz) + class_idx];
+                                // symmetry
+                                atomic_ref<real_type>{ C_ptr[global_col * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+                            }
                         }
                     }
                 }
diff --git a/include/plssvm/backends/stdpar/kernel/kernel_functions.hpp b/include/plssvm/backends/stdpar/kernel/kernel_functions.hpp
index 8d86925c2..1b1a7cf52 100644
--- a/include/plssvm/backends/stdpar/kernel/kernel_functions.hpp
+++ b/include/plssvm/backends/stdpar/kernel/kernel_functions.hpp
@@ -16,8 +16,9 @@
 #include "plssvm/constants.hpp"              // plssvm::real_type
 #include "plssvm/kernel_function_types.hpp"  // plssvm::kernel_function_type
 
-#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM)  // TODO: remove after linker error is fixed on AMD GPUs
-    #include "sycl/sycl.hpp"                       // override std::* math functions
+// to prevent major headaches on various different platforms with different SYCL compilers, ALWAYS use the SYCL math functions in stdpar kernels
+#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM) || defined(PLSSVM_STDPAR_BACKEND_HAS_ACPP)
+    #include "sycl/sycl.hpp"  // override std::* math functions
 #endif
 
 #if defined(PLSSVM_STDPAR_BACKEND_HAS_HIPSTDPAR)
@@ -66,7 +67,7 @@ template <>
  */
 template <>
 [[nodiscard]] inline PLSSVM_STDPAR_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::laplacian>(const real_type val1, const real_type val2) {
-#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM)
+#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM) || defined(PLSSVM_STDPAR_BACKEND_HAS_ACPP)
     return ::sycl::fabs(val1 - val2);
 #else
     return std::abs(val1 - val2);
@@ -117,7 +118,7 @@ template <>
  */
 template <>
 [[nodiscard]] inline PLSSVM_STDPAR_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::polynomial>(const real_type value, const int degree, const real_type gamma, const real_type coef0) {
-#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM)
+#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM) || defined(PLSSVM_STDPAR_BACKEND_HAS_ACPP)
     return ::sycl::pow(gamma * value + coef0, (real_type) degree);
 #else
     return std::pow(gamma * value + coef0, (real_type) degree);
@@ -132,7 +133,7 @@ template <>
  */
 template <>
 [[nodiscard]] inline PLSSVM_STDPAR_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::rbf>(const real_type value, const real_type gamma) {
-#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM)
+#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM) || defined(PLSSVM_STDPAR_BACKEND_HAS_ACPP)
     return ::sycl::exp(-gamma * value);
 #else
     return std::exp(-gamma * value);
@@ -148,7 +149,7 @@ template <>
  */
 template <>
 [[nodiscard]] inline PLSSVM_STDPAR_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::sigmoid>(const real_type value, const real_type gamma, const real_type coef0) {
-#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM)
+#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM) || defined(PLSSVM_STDPAR_BACKEND_HAS_ACPP)
     return ::sycl::tanh(gamma * value + coef0);
 #else
     return std::tanh(gamma * value + coef0);
@@ -163,7 +164,7 @@ template <>
  */
 template <>
 [[nodiscard]] inline PLSSVM_STDPAR_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::laplacian>(const real_type value, const real_type gamma) {
-#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM)
+#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM) || defined(PLSSVM_STDPAR_BACKEND_HAS_ACPP)
     return ::sycl::exp(-gamma * value);
 #else
     return std::exp(-gamma * value);
@@ -178,7 +179,7 @@ template <>
  */
 template <>
 [[nodiscard]] inline PLSSVM_STDPAR_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::chi_squared>(const real_type value, const real_type gamma) {
-#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM)
+#if defined(PLSSVM_STDPAR_BACKEND_HAS_INTEL_LLVM) || defined(PLSSVM_STDPAR_BACKEND_HAS_ACPP)
     return ::sycl::exp(-gamma * value);
 #else
     return std::exp(-gamma * value);
diff --git a/include/plssvm/backends/stdpar/kernel/predict_kernel.hpp b/include/plssvm/backends/stdpar/kernel/predict_kernel.hpp
index 508a51b9c..ce46e6a1c 100644
--- a/include/plssvm/backends/stdpar/kernel/predict_kernel.hpp
+++ b/include/plssvm/backends/stdpar/kernel/predict_kernel.hpp
@@ -36,16 +36,20 @@ namespace plssvm::stdpar::detail {
  * @param[out] w the vector to speedup the linear prediction
  * @param[in] alpha the previously learned weights
  * @param[in] support_vectors the support vectors
+ * @param[in] device_specific_num_sv the number of support vectors the current device is responsible for
+ * @param[in] sv_offset the first row in @p support_vectors the current device is responsible for
  */
-inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<real_type> &alpha, const soa_matrix<real_type> &support_vectors) {
+inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<real_type> &alpha, const soa_matrix<real_type> &support_vectors, const std::size_t device_specific_num_sv, const std::size_t sv_offset) {
     PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
     PLSSVM_ASSERT(w.shape() == (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }), "Shape mismatch: {} vs {}!", w.shape(), (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }));
+    PLSSVM_ASSERT(support_vectors.num_rows() >= device_specific_num_sv, "The number of place specific sv ({}) cannot be greater the the total number of sv ({})!", device_specific_num_sv, support_vectors.num_rows());
+    PLSSVM_ASSERT(support_vectors.num_rows() >= sv_offset, "The sv offset ({}) cannot be greater the the total number of sv ({})!", sv_offset, support_vectors.num_rows());
 
     // calculate constants
-    const std::size_t num_features = support_vectors.num_cols();
-    const auto blocked_num_features = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_features) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_classes = alpha.num_rows();
     const auto blocked_num_classes = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_classes) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_features = support_vectors.num_cols();
+    const auto blocked_num_features = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_features) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_support_vectors = support_vectors.num_rows();
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
@@ -53,7 +57,7 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
     // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_num_features * blocked_num_classes);
+    std::vector<std::size_t> range(blocked_num_classes * blocked_num_features);
     std::iota(range.begin(), range.end(), 0);
 
     std::for_each(std::execution::par_unseq, range.begin(), range.end(), [=, w_ptr = w.data(), alpha_ptr = alpha.data(), sv_ptr = support_vectors.data()](const std::size_t idx) {
@@ -68,14 +72,14 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
         std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
 
         // iterate over all features
-        for (std::size_t sv = 0; sv < num_support_vectors; ++sv) {
+        for (std::size_t sv = 0; sv < device_specific_num_sv; ++sv) {
             // perform the feature reduction calculation
             for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
                 for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
                     const std::size_t global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
                     const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
-                    temp[internal_feature][internal_class] += alpha_ptr[global_class_idx * (num_support_vectors + PADDING_SIZE_uz) + sv] * sv_ptr[global_feature_idx * (num_support_vectors + PADDING_SIZE_uz) + sv];
+                    temp[internal_feature][internal_class] += alpha_ptr[global_class_idx * (num_support_vectors + PADDING_SIZE_uz) + sv_offset + sv] * sv_ptr[global_feature_idx * (num_support_vectors + PADDING_SIZE_uz) + sv_offset + sv];
                 }
             }
         }
@@ -98,15 +102,19 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
  * @param[in] w the vector to speedup the calculations
  * @param[in] rho the previously learned bias
  * @param[in] predict_points the data points to predict
+ * @param[in] device_specific_num_predict_points the number of predict points the current device is responsible for
+ * @param[in] row_offset the first row in @p predict_points the current device is responsible for
  */
-inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, const soa_matrix<real_type> &w, const std::vector<real_type> &rho, const soa_matrix<real_type> &predict_points) {
+inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, const soa_matrix<real_type> &w, const std::vector<real_type> &rho, const soa_matrix<real_type> &predict_points, const std::size_t device_specific_num_predict_points, const std::size_t row_offset) {
     PLSSVM_ASSERT(w.num_rows() == rho.size(), "Size mismatch: {} vs {}!", w.num_rows(), rho.size());
     PLSSVM_ASSERT(w.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", w.num_cols(), predict_points.num_cols());
     PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), w.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), w.num_rows() }));
+    PLSSVM_ASSERT(predict_points.num_rows() >= device_specific_num_predict_points, "The number of place specific predict points ({}) cannot be greater the the total number of predict points ({})!", device_specific_num_predict_points, predict_points.num_rows());
+    PLSSVM_ASSERT(predict_points.num_rows() >= row_offset, "The row offset ({}) cannot be greater the the total number of predict points ({})!", row_offset, predict_points.num_rows());
 
     // calculate constants
     const std::size_t num_predict_points = predict_points.num_rows();
-    const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_predict_points) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_classes = prediction.num_cols();
     const auto blocked_num_classes = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_classes) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_features = predict_points.num_cols();
@@ -116,7 +124,7 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
     // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_num_predict_points * blocked_num_classes);
+    std::vector<std::size_t> range(blocked_device_specific_num_predict_points * blocked_num_classes);
     std::iota(range.begin(), range.end(), 0);
 
     std::for_each(std::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), w_ptr = w.data(), rho_ptr = rho.data(), pp_ptr = predict_points.data()](const std::size_t idx) {
@@ -135,7 +143,7 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
             // perform the feature reduction calculation
             for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
                 for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
-                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                     const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
                     temp[internal_pp][internal_class] += w_ptr[dim * (num_classes + PADDING_SIZE_uz) + global_class_idx] * pp_ptr[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx];
@@ -146,10 +154,11 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
         // perform the dot product calculation
         for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
             for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
-                const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                const std::size_t device_global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                 const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
-                if (global_pp_idx < num_predict_points && global_class_idx < num_classes) {
+                if (device_global_pp_idx < device_specific_num_predict_points && global_class_idx < num_classes) {
                     prediction_ptr[global_pp_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_pp][internal_class] - rho_ptr[global_class_idx];
                 }
             }
@@ -166,21 +175,25 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
  * @param[in] rho the previously learned bias
  * @param[in] support_vectors the support vectors
  * @param[in] predict_points the data points to predict
+ * @param[in] device_specific_num_predict_points the number of predict points the current device is responsible for
+ * @param[in] row_offset the first row in @p predict_points the current device is responsible for
  * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function
  */
 template <kernel_function_type kernel, typename... Args>
-inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, const soa_matrix<real_type> &support_vectors, const soa_matrix<real_type> &predict_points, Args... kernel_function_parameter) {
+inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, const soa_matrix<real_type> &support_vectors, const soa_matrix<real_type> &predict_points, const std::size_t device_specific_num_predict_points, const std::size_t row_offset, Args... kernel_function_parameter) {
     PLSSVM_ASSERT(alpha.num_rows() == rho.size(), "Size mismatch: {} vs {}!", alpha.num_rows(), rho.size());
     PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
     PLSSVM_ASSERT(support_vectors.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", support_vectors.num_cols(), predict_points.num_cols());
     PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }));
+    PLSSVM_ASSERT(predict_points.num_rows() >= device_specific_num_predict_points, "The number of place specific predict points ({}) cannot be greater the the total number of predict points ({})!", device_specific_num_predict_points, predict_points.num_rows());
+    PLSSVM_ASSERT(predict_points.num_rows() >= row_offset, "The row offset ({}) cannot be greater the the total number of predict points ({})!", row_offset, predict_points.num_rows());
 
     // calculate constants
     const std::size_t num_classes = alpha.num_rows();
     const std::size_t num_support_vectors = support_vectors.num_rows();
     const auto blocked_num_support_vectors = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_support_vectors) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_predict_points = predict_points.num_rows();
-    const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_device_specific_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(device_specific_num_predict_points) / INTERNAL_BLOCK_SIZE));
     const std::size_t num_features = predict_points.num_cols();
 
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
@@ -188,7 +201,7 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
     // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_num_predict_points * blocked_num_support_vectors);
+    std::vector<std::size_t> range(blocked_device_specific_num_predict_points * blocked_num_support_vectors);
     std::iota(range.begin(), range.end(), 0);
 
     std::for_each(std::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), alpha_ptr = alpha.data(), rho_ptr = rho.data(), sv_ptr = support_vectors.data(), pp_ptr = predict_points.data()](const std::size_t idx) {
@@ -207,7 +220,7 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
             // perform the feature reduction calculation
             for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
                 for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
-                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                     const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
 
                     temp[internal_pp][internal_sv] += detail::feature_reduce<kernel>(sv_ptr[dim * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx],
@@ -227,11 +240,12 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
         for (std::size_t a = 0; a < num_classes; ++a) {
             for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
                 for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
-                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t device_global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_pp_idx = row_offset + pp_idx + static_cast<std::size_t>(internal_pp);
                     const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
 
                     // be sure to not perform out of bounds accesses
-                    if (global_pp_idx < num_predict_points && global_sv_idx < num_support_vectors) {
+                    if (device_global_pp_idx < device_specific_num_predict_points && global_sv_idx < num_support_vectors) {
                         if (global_sv_idx == 0) {
                             atomic_ref<real_type>{ prediction_ptr[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } += -rho_ptr[a];
                         }
diff --git a/include/plssvm/classification_report.hpp b/include/plssvm/classification_report.hpp
index e1137dacb..eed8fc84b 100644
--- a/include/plssvm/classification_report.hpp
+++ b/include/plssvm/classification_report.hpp
@@ -160,8 +160,6 @@ class classification_report {
 
     /// The number of floating point digits printed in the classification report output.
     int output_digits_{ 2 };
-    /// Flag, whether the micro average or the accuracy should be printed in the classification report output.
-    bool use_micro_average_{ false };
     /// The used zero division behavior.
     zero_division_behavior zero_div_{ zero_division_behavior::warn };
 };
diff --git a/include/plssvm/core.hpp b/include/plssvm/core.hpp
index 1e7c73f3c..6ec7773c4 100644
--- a/include/plssvm/core.hpp
+++ b/include/plssvm/core.hpp
@@ -32,6 +32,7 @@
 #include "plssvm/matrix.hpp"                                 // a custom matrix class
 #include "plssvm/model/classification_model.hpp"             // the model as a result of training a C-SVC
 #include "plssvm/model/regression_model.hpp"                 // the model as a result of training a C-SVR
+#include "plssvm/mpi/communicator.hpp"                       // PLSSVM MPI communicator wrapper
 #include "plssvm/parameter.hpp"                              // the C-SVM parameter
 #include "plssvm/regression_report.hpp"                      // reports different metrics (e.g., mean squared error or R^2 score) for the regression task after scoring
 #include "plssvm/shape.hpp"                                  // shape for a matrix or device pointer
@@ -66,6 +67,12 @@ namespace plssvm::detail::io { }
 /// Namespace containing implementation details for the command line interface functionality. **Should not** directly be used by users.
 namespace plssvm::detail::cmd { }
 
+/// Namespace containing MPI wrapper functionality.
+namespace plssvm::mpi { }
+
+/// Namespace containing implementation details for our MPI wrapper functionality. **Should not** directly be used by users.
+namespace plssvm::mpi::detail { }
+
 /// Namespace containing implementation details for the performance tracking and hardware sampling functionality. **Should not** directly be used by users.
 namespace plssvm::detail::tracking { }
 
diff --git a/include/plssvm/csvm_factory.hpp b/include/plssvm/csvm_factory.hpp
index 49e08cd3e..a59f45c86 100644
--- a/include/plssvm/csvm_factory.hpp
+++ b/include/plssvm/csvm_factory.hpp
@@ -69,7 +69,8 @@ namespace detail {
 /**
  * @brief Construct a C-SVM using the parameters @p args.
  * @details The default case, no special parameters for the C-SVMs are necessary.
- * @tparam csvm_type the type of the C-SVM
+ * @tparam base_csvm_type the type of the C-SVM base class
+ * @tparam backend_csvm_type the type of the C-SVM backend specific class
  * @tparam Args the types of the parameters to initialize the C-SVM
  * @param[in] args the parameters used to initialize the C-SVM
  * @throws plssvm::unsupported_backend_exception if the @p backend is not recognized
diff --git a/include/plssvm/data_set/classification_data_set.hpp b/include/plssvm/data_set/classification_data_set.hpp
index 460b6088e..d792163c8 100644
--- a/include/plssvm/data_set/classification_data_set.hpp
+++ b/include/plssvm/data_set/classification_data_set.hpp
@@ -17,13 +17,14 @@
 #include "plssvm/data_set/data_set.hpp"                    // plssvm::data_set
 #include "plssvm/data_set/min_max_scaler.hpp"              // plssvm::min_max_scaler
 #include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY, plssvm::detail::tracking::tracking_entry
 #include "plssvm/detail/type_list.hpp"                     // plssvm::detail::{supported_label_types_classification, tuple_contains_v}
 #include "plssvm/detail/utility.hpp"                       // plssvm::detail::contains
 #include "plssvm/exceptions/exceptions.hpp"                // plssvm::data_set_exception
 #include "plssvm/file_format_types.hpp"                    // plssvm::file_format_type
 #include "plssvm/matrix.hpp"                               // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"                     // plssvm::mpi::communicator
 #include "plssvm/shape.hpp"                                // plssvm::shape
 #include "plssvm/verbosity_levels.hpp"                     // plssvm::verbosity_level
 
@@ -82,104 +83,450 @@ class classification_data_set : public data_set<U> {
     class label_mapper;
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::string &)
+     * @brief Read the data points from the file @p filename.
+     *        Automatically determines the plssvm::file_format_type based on the file extension.
+     * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] filename the file to read the data points from
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
      */
     explicit classification_data_set(const std::string &filename) :
-        base_data_set{ filename } { this->init(); }
+        base_data_set{ mpi::communicator{}, filename } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::string &, file_format_type)
+     * @brief Read the data points from the file @p filename.
+     *        Automatically determines the plssvm::file_format_type based on the file extension.
+     * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the file to read the data points from
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     */
+    classification_data_set(mpi::communicator comm, const std::string &filename) :
+        base_data_set{ std::move(comm), filename } { this->init(); }
+
+    /**
+     * @brief Read the data points from the file @p filename assuming that the file is given in the @p plssvm::file_format_type.
+     * @param[in] filename the file to read the data points from
+     * @param[in] format the assumed file format used to parse the data points
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
      */
     classification_data_set(const std::string &filename, file_format_type format) :
-        base_data_set{ filename, format } { this->init(); }
+        base_data_set{ mpi::communicator{}, filename, format } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::string &, min_max_scaler)
+     * @brief Read the data points from the file @p filename assuming that the file is given in the @p plssvm::file_format_type.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the file to read the data points from
+     * @param[in] format the assumed file format used to parse the data points
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     */
+    classification_data_set(mpi::communicator comm, const std::string &filename, file_format_type format) :
+        base_data_set{ std::move(comm), filename, format } { this->init(); }
+
+    /**
+     * @brief Read the data points from the file @p filename and scale it using the provided @p scaler.
+     *        Automatically determines the plssvm::file_format_type based on the file extension.
+     * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] filename the file to read the data points from
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     classification_data_set(const std::string &filename, min_max_scaler scaler) :
-        base_data_set{ filename, std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, filename, std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::string &, file_format_type, min_max_scaler)
+     * @brief Read the data points from the file @p filename and scale it using the provided @p scaler.
+     *        Automatically determines the plssvm::file_format_type based on the file extension.
+     * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the file to read the data points from
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    classification_data_set(mpi::communicator comm, const std::string &filename, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), filename, std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Read the data points from the file @p filename assuming that the file is given in the plssvm::file_format_type @p format and
+     *        scale it using the provided @p scaler.
+     * @param[in] filename the file to read the data points from
+     * @param[in] format the assumed file format used to parse the data points
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     classification_data_set(const std::string &filename, file_format_type format, min_max_scaler scaler) :
-        base_data_set{ filename, format, std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, filename, format, std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Read the data points from the file @p filename assuming that the file is given in the plssvm::file_format_type @p format and
+     *        scale it using the provided @p scaler.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the file to read the data points from
+     * @param[in] format the assumed file format used to parse the data points
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    classification_data_set(mpi::communicator comm, const std::string &filename, file_format_type format, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), filename, format, std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &)
+     * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
      */
     explicit classification_data_set(const std::vector<std::vector<real_type>> &data_points) :
-        base_data_set{ data_points } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points } { this->init(); }
+
+    /**
+     * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     */
+    classification_data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points) :
+        base_data_set{ std::move(comm), data_points } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, std::vector<label_type>)
+     * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and copying the @p labels.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      */
     classification_data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels) :
-        base_data_set{ data_points, std::move(labels) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(labels) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, min_max_scaler)
+     * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and copying the @p labels.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     */
+    classification_data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels) :
+        base_data_set{ std::move(comm), data_points, std::move(labels) } { this->init(); }
+
+    /**
+     * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and scale them using the provided @p scaler.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     classification_data_set(const std::vector<std::vector<real_type>> &data_points, min_max_scaler scaler) :
-        base_data_set{ data_points, std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, std::vector<label_type>, min_max_scaler)
+     * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and scale them using the provided @p scaler.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    classification_data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), data_points, std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and copying the @p labels and scale the @p data_points using the provided @p scaler.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     classification_data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
-        base_data_set{ data_points, std::move(labels), std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(labels), std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and copying the @p labels and scale the @p data_points using the provided @p scaler.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    classification_data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), data_points, std::move(labels), std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &)
+     * @brief Create a new data set from the provided @p data_points.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
      */
     template <layout_type layout>
     explicit classification_data_set(const matrix<real_type, layout> &data_points) :
-        base_data_set{ data_points } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points } { this->init(); }
+
+    /**
+     * @brief Create a new data set from the provided @p data_points.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     */
+    template <layout_type layout>
+    classification_data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points) :
+        base_data_set{ std::move(comm), data_points } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, std::vector<label_type>)
+     * @brief Create a new data set from the provided @p data_points and @p labels.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      */
     template <layout_type layout>
     classification_data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels) :
-        base_data_set{ data_points, std::move(labels) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(labels) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, min_max_scaler)
+     * @brief Create a new data set from the provided @p data_points and @p labels.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     */
+    template <layout_type layout>
+    classification_data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, std::vector<label_type> labels) :
+        base_data_set{ std::move(comm), data_points, std::move(labels) } { this->init(); }
+
+    /**
+     * @brief Create a new data set from the the provided @p data_points and scale them using the provided @p scaler.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     template <layout_type layout>
     classification_data_set(const matrix<real_type, layout> &data_points, min_max_scaler scaler) :
-        base_data_set{ data_points, std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, std::vector<label_type>, min_max_scaler)
+     * @brief Create a new data set from the the provided @p data_points and scale them using the provided @p scaler.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    template <layout_type layout>
+    classification_data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), data_points, std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Create a new data set from the the provided @p data_points and @p labels and scale the @p data_points using the provided @p scaler.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     template <layout_type layout>
     classification_data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
-        base_data_set{ data_points, std::move(labels), std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(labels), std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Create a new data set from the the provided @p data_points and @p labels and scale the @p data_points using the provided @p scaler.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    template <layout_type layout>
+    classification_data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), data_points, std::move(labels), std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&)
+     * @brief Use the provided @p data_points in this data set.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
      */
     explicit classification_data_set(soa_matrix<real_type> &&data_points) :
-        base_data_set{ std::move(data_points) } { this->init(); }
+        base_data_set{ mpi::communicator{}, std::move(data_points) } { this->init(); }
+
+    /**
+     * @brief Use the provided @p data_points in this data set.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     */
+    classification_data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points) :
+        base_data_set{ std::move(comm), std::move(data_points) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&, std::vector<label_type> &&)
+     * @brief Use the provided @p data_points and @p labels in this data set.
+     * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      */
     classification_data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels) :
-        base_data_set{ std::move(data_points), std::move(labels) } { this->init(); }
+        base_data_set{ mpi::communicator{}, std::move(data_points), std::move(labels) } { this->init(); }
+
+    /**
+     * @brief Use the provided @p data_points and @p labels in this data set.
+     * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     */
+    classification_data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels) :
+        base_data_set{ std::move(comm), std::move(data_points), std::move(labels) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&, min_max_scaler)
+     * @brief Use the provided @p data_points in this data set and scale them using the provided @p scaler.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     classification_data_set(soa_matrix<real_type> &&data_points, min_max_scaler scaler) :
-        base_data_set{ std::move(data_points), std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, std::move(data_points), std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&, std::vector<label_type> &&, min_max_scaler)
+     * @brief Use the provided @p data_points in this data set and scale them using the provided @p scaler.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    classification_data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), std::move(data_points), std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Use the provided @p data_points and @p labels in this data set and scale them using the provided @p scaler.
+     * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     classification_data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler) :
-        base_data_set{ std::move(data_points), std::move(labels), std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, std::move(data_points), std::move(labels), std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Use the provided @p data_points and @p labels in this data set and scale them using the provided @p scaler.
+     * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    classification_data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), std::move(data_points), std::move(labels), std::move(scaler) } { this->init(); }
 
     /**
      * @copydoc plssvm::data_set::save
@@ -328,12 +675,14 @@ void classification_data_set<U>::init() {
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "data_set_create", "type", "classification" }));
     if (this->has_labels()) {
         detail::log(verbosity_level::full | verbosity_level::timing,
+                    this->communicator(),
                     "Created a classification data set with {} data points, {} features, and {} classes.\n",
                     detail::tracking::tracking_entry{ "data_set_create", "num_data_points", this->num_data_points() },
                     detail::tracking::tracking_entry{ "data_set_create", "num_features", this->num_features() },
                     detail::tracking::tracking_entry{ "data_set_create", "num_classes", this->num_classes() });
     } else {
         detail::log(verbosity_level::full | verbosity_level::timing,
+                    this->communicator(),
                     "Created a classification data set with {} data points and {} features.\n",
                     detail::tracking::tracking_entry{ "data_set_create", "num_data_points", this->num_data_points() },
                     detail::tracking::tracking_entry{ "data_set_create", "num_features", this->num_features() });
@@ -349,6 +698,7 @@ void classification_data_set<U>::save(const std::string &filename, const file_fo
 
     const std::chrono::time_point end_time = std::chrono::steady_clock::now();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                this->communicator(),
                 "Write {} classification data points with {} features and {} classes in {} to the {} file '{}'.\n",
                 detail::tracking::tracking_entry{ "data_set_write", "num_data_points", this->num_data_points() },
                 detail::tracking::tracking_entry{ "data_set_write", "num_features", this->num_features() },
diff --git a/include/plssvm/data_set/data_set.hpp b/include/plssvm/data_set/data_set.hpp
index bc5464544..d448e61cb 100644
--- a/include/plssvm/data_set/data_set.hpp
+++ b/include/plssvm/data_set/data_set.hpp
@@ -13,29 +13,23 @@
 #define PLSSVM_DATA_SET_DATA_SET_HPP_
 #pragma once
 
-#include "plssvm/constants.hpp"                            // plssvm::real_type, plssvm::PADDING_SIZE
-#include "plssvm/data_set/min_max_scaler.hpp"              // plssvm::min_max_scaler
-#include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
-#include "plssvm/detail/io/arff_parsing.hpp"               // plssvm::detail::io::write_libsvm_data
-#include "plssvm/detail/io/file_reader.hpp"                // plssvm::detail::io::file_reader
-#include "plssvm/detail/io/libsvm_parsing.hpp"             // plssvm::detail::io::write_arff_data
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
-#include "plssvm/detail/string_utility.hpp"                // plssvm::detail::ends_with
-#include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking::tracking_entry
-#include "plssvm/detail/utility.hpp"                       // plssvm::detail::contains
-#include "plssvm/exceptions/exceptions.hpp"                // plssvm::data_set_exception
-#include "plssvm/file_format_types.hpp"                    // plssvm::file_format_type
-#include "plssvm/matrix.hpp"                               // plssvm::soa_matrix
-#include "plssvm/shape.hpp"                                // plssvm::shape
-#include "plssvm/verbosity_levels.hpp"                     // plssvm::verbosity_level
+#include "plssvm/constants.hpp"                 // plssvm::real_type, plssvm::PADDING_SIZE
+#include "plssvm/data_set/min_max_scaler.hpp"   // plssvm::min_max_scaler
+#include "plssvm/detail/io/arff_parsing.hpp"    // plssvm::detail::io::write_libsvm_data
+#include "plssvm/detail/io/file_reader.hpp"     // plssvm::detail::io::file_reader
+#include "plssvm/detail/io/libsvm_parsing.hpp"  // plssvm::detail::io::write_arff_data
+#include "plssvm/detail/string_utility.hpp"     // plssvm::detail::ends_with
+#include "plssvm/exceptions/exceptions.hpp"     // plssvm::data_set_exception, plssvm::mpi_exception
+#include "plssvm/file_format_types.hpp"         // plssvm::file_format_type
+#include "plssvm/matrix.hpp"                    // plssvm::soa_matrix
+#include "plssvm/mpi/communicator.hpp"          // plssvm::mpi::communicator
+#include "plssvm/shape.hpp"                     // plssvm::shape
 
 #include "fmt/format.h"  // fmt::format
 
 #include <algorithm>   // std::max, std::min, std::sort, std::adjacent_find
-#include <chrono>      // std::chrono::{time_point, steady_clock, duration_cast, millisecond}
 #include <cstddef>     // std::size_t
 #include <functional>  // std::reference_wrapper, std::cref
-#include <limits>      // std::numeric_limits::{max, lowest}
 #include <memory>      // std::shared_ptr, std::make_shared
 #include <optional>    // std::optional, std::make_optional, std::nullopt
 #include <string>      // std::string
@@ -70,49 +64,57 @@ class data_set {
      * @brief Read the data points from the file @p filename.
      *        Automatically determines the plssvm::file_format_type based on the file extension.
      * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] filename the file to read the data points from
      * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
      */
-    explicit data_set(const std::string &filename);
+    data_set(mpi::communicator comm, const std::string &filename);
     /**
      * @brief Read the data points from the file @p filename assuming that the file is given in the @p plssvm::file_format_type.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] filename the file to read the data points from
      * @param[in] format the assumed file format used to parse the data points
      * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
      */
-    data_set(const std::string &filename, file_format_type format);
+    data_set(mpi::communicator comm, const std::string &filename, file_format_type format);
     /**
      * @brief Read the data points from the file @p filename and scale it using the provided @p scaler.
      *        Automatically determines the plssvm::file_format_type based on the file extension.
      * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] filename the file to read the data points from
      * @param[in] scaler the parameters used to scale the data set feature values to a given range
      * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
      * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
      */
-    data_set(const std::string &filename, min_max_scaler scaler);
+    data_set(mpi::communicator comm, const std::string &filename, min_max_scaler scaler);
     /**
      * @brief Read the data points from the file @p filename assuming that the file is given in the plssvm::file_format_type @p format and
      *        scale it using the provided @p scaler.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] filename the file to read the data points from
      * @param[in] format the assumed file format used to parse the data points
      * @param[in] scaler the parameters used to scale the data set feature values to a given range
      * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
      * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
      */
-    data_set(const std::string &filename, file_format_type format, min_max_scaler scaler);
+    data_set(mpi::communicator comm, const std::string &filename, file_format_type format, min_max_scaler scaler);
 
     /**
      * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix.
      * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @throws plssvm::data_set_exception if the @p data_points vector is empty
      * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
      * @throws plssvm::data_set_exception if any @p data_point has no features
      */
-    explicit data_set(const std::vector<std::vector<real_type>> &data_points);
+    data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points);
     /**
      * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and copying the @p labels.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @param[in] labels the labels used in this data set
      * @throws plssvm::data_set_exception if the @p data_points vector is empty
@@ -120,19 +122,22 @@ class data_set {
      * @throws plssvm::data_set_exception if any @p data_point has no features
      * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      */
-    data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels);
+    data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels);
     /**
      * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and scale them using the provided @p scaler.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @param[in] scaler the parameters used to scale the data set feature values to a given range
      * @throws plssvm::data_set_exception if the @p data_points vector is empty
      * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
      * @throws plssvm::data_set_exception if any @p data_point has no features
      * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
      */
-    data_set(const std::vector<std::vector<real_type>> &data_points, min_max_scaler scaler);
+    data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, min_max_scaler scaler);
     /**
      * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and copying the @p labels and scale the @p data_points using the provided @p scaler.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @param[in] labels the labels used in this data set
      * @param[in] scaler the parameters used to scale the data set feature values to a given range
@@ -141,25 +146,28 @@ class data_set {
      * @throws plssvm::data_set_exception if any @p data_point has no features
      * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
      */
-    data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scaler);
+    data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scaler);
 
     /**
      * @brief Create a new data set from the provided @p data_points.
      * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
      * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
      * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @throws plssvm::data_set_exception if the @p data_points vector is empty
      * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
      * @throws plssvm::data_set_exception if any @p data_point has no features
      */
     template <layout_type layout>
-    explicit data_set(const matrix<real_type, layout> &data_points);
+    data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points);
     /**
      * @brief Create a new data set from the provided @p data_points and @p labels.
      * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
      * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @param[in] labels the labels used in this data set
      * @throws plssvm::data_set_exception if the @p data_points vector is empty
@@ -168,24 +176,27 @@ class data_set {
      * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      */
     template <layout_type layout>
-    data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels);
+    data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, std::vector<label_type> labels);
     /**
      * @brief Create a new data set from the the provided @p data_points and scale them using the provided @p scaler.
      * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
      * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @param[in] scaler the parameters used to scale the data set feature values to a given range
      * @throws plssvm::data_set_exception if the @p data_points vector is empty
      * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
      * @throws plssvm::data_set_exception if any @p data_point has no features
      * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
      */
     template <layout_type layout>
-    data_set(const matrix<real_type, layout> &data_points, min_max_scaler scaler);
+    data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, min_max_scaler scaler);
     /**
      * @brief Create a new data set from the the provided @p data_points and @p labels and scale the @p data_points using the provided @p scaler.
      * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
      * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @param[in] labels the labels used in this data set
      * @param[in] scaler the parameters used to scale the data set feature values to a given range
@@ -194,24 +205,27 @@ class data_set {
      * @throws plssvm::data_set_exception if any @p data_point has no features
      * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
      */
     template <layout_type layout>
-    data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler);
+    data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler);
 
     /**
      * @brief Use the provided @p data_points in this data set.
      * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
      * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @throws plssvm::data_set_exception if the @p data_points vector is empty
      * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
      * @throws plssvm::data_set_exception if any @p data_point has no features
      * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
      */
-    explicit data_set(soa_matrix<real_type> &&data_points);
+    data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points);
     /**
      * @brief Use the provided @p data_points and @p labels in this data set.
      * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @param[in] labels the labels used in this data set
      * @throws plssvm::data_set_exception if the @p data_points vector is empty
@@ -220,11 +234,12 @@ class data_set {
      * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
      * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      */
-    data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels);
+    data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels);
     /**
      * @brief Use the provided @p data_points in this data set and scale them using the provided @p scaler.
      * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
      * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @param[in] scaler the parameters used to scale the data set feature values to a given range
      * @throws plssvm::data_set_exception if the @p data_points vector is empty
@@ -232,11 +247,13 @@ class data_set {
      * @throws plssvm::data_set_exception if any @p data_point has no features
      * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
      * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
      */
-    data_set(soa_matrix<real_type> &&data_points, min_max_scaler scaler);
+    data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, min_max_scaler scaler);
     /**
      * @brief Use the provided @p data_points and @p labels in this data set and scale them using the provided @p scaler.
      * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
      * @param[in] data_points the data points used in this data set
      * @param[in] labels the labels used in this data set
      * @param[in] scaler the parameters used to scale the data set feature values to a given range
@@ -246,8 +263,9 @@ class data_set {
      * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
      * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
      */
-    data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler);
+    data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler);
 
     /**
      * @brief Default copy constructor.
@@ -275,12 +293,14 @@ class data_set {
 
     /**
      * @brief Save the data points and potential labels of this data set to the file @p filename using the file @p format type.
+     * @note Only the main MPI rank (traditionally rank 0) saves the whole data set (if MPI is available).
      * @param[in] filename the file to save the data points and labels to
      * @param[in] format the file format
      */
     virtual void save(const std::string &filename, file_format_type format) const;
     /**
      * @brief Save the data points and potential labels of this data set to the file @p filename.
+     * @note Only the main MPI rank (traditionally rank 0) saves the whole data set (if MPI is available).
      * @details Automatically determines the plssvm::file_format_type based on the file extension.
      *          If the file extension isn't `.arff`, saves the data as `.libsvm` file.
      * @param[in] filename the file to save the data points and labels to
@@ -333,6 +353,14 @@ class data_set {
      */
     [[nodiscard]] optional_ref<const min_max_scaler> scaling_factors() const noexcept;
 
+    /**
+     * @brief Get the associated MPI communicator.
+     * @return the MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const mpi::communicator &communicator() const noexcept {
+        return comm_;
+    }
+
   protected:
     /**
      * @brief Default construct an empty data set.
@@ -360,6 +388,9 @@ class data_set {
     /// The number of features in this data set.
     size_type num_features_{ 0 };
 
+    /// The used MPI communicator.
+    mpi::communicator comm_{};
+
     /// A pointer to the two-dimensional data points.
     std::shared_ptr<soa_matrix<real_type>> data_ptr_{ nullptr };
     /// A pointer to the original labels of this data set; may be `nullptr` if no labels have been provided.
@@ -376,61 +407,73 @@ class data_set {
 //*************************************************************************************************************************************//
 
 template <typename U>
-data_set<U>::data_set(const std::string &filename) {
+data_set<U>::data_set(mpi::communicator comm, const std::string &filename) :
+    comm_{ std::move(comm) } {
     // read data set from file
     // if the file doesn't end with .arff, assume a LIBSVM file
     this->read_file(filename, detail::ends_with(filename, ".arff") ? file_format_type::arff : file_format_type::libsvm);
 }
 
 template <typename U>
-data_set<U>::data_set(const std::string &filename, const file_format_type format) {
+data_set<U>::data_set(mpi::communicator comm, const std::string &filename, const file_format_type format) :
+    comm_{ std::move(comm) } {
     // read data set from file
     this->read_file(filename, format);
 }
 
 template <typename U>
-data_set<U>::data_set(const std::string &filename, min_max_scaler scale_parameter) :
-    data_set{ filename } {
+data_set<U>::data_set(mpi::communicator comm, const std::string &filename, min_max_scaler scaler) :
+    data_set{ std::move(comm), filename } {
+    // check whether the data set and scaler MPI communicators are identical
+    if (comm != scaler.communicator()) {
+        throw mpi_exception{ "The MPI communicators provided to the data set and scaler must be identical!" };
+    }
+
     // initialize scaling
-    scaler_ = std::make_shared<min_max_scaler>(std::move(scale_parameter));
+    scaler_ = std::make_shared<min_max_scaler>(std::move(scaler));
     // scale data set
     scaler_->scale(*data_ptr_);
 }
 
 template <typename U>
-data_set<U>::data_set(const std::string &filename, file_format_type format, min_max_scaler scale_parameter) :
-    data_set{ filename, format } {
+data_set<U>::data_set(mpi::communicator comm, const std::string &filename, file_format_type format, min_max_scaler scaler) :
+    data_set{ std::move(comm), filename, format } {
+    // check whether the data set and scaler MPI communicators are identical
+    if (comm != scaler.communicator()) {
+        throw mpi_exception{ "The MPI communicators provided to the data set and scaler must be identical!" };
+    }
+
     // initialize scaling
-    scaler_ = std::make_shared<min_max_scaler>(std::move(scale_parameter));
+    scaler_ = std::make_shared<min_max_scaler>(std::move(scaler));
     // scale data set
     scaler_->scale(*data_ptr_);
 }
 
 // clang-format off
 template <typename U>
-data_set<U>::data_set(const std::vector<std::vector<real_type>> &data_points) try :
-    data_set{ soa_matrix<real_type>{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } } } {}
+data_set<U>::data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points) try :
+    data_set{ std::move(comm), soa_matrix<real_type>{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } } } {}
     catch (const matrix_exception &e) {
         throw data_set_exception{ e.what() };
     }
 
 template <typename U>
-data_set<U>::data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels) try :
-    data_set{ soa_matrix<real_type>{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(labels) } {}
+data_set<U>::data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels) try :
+    data_set{ std::move(comm), soa_matrix<real_type>{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(labels) } {}
     catch (const matrix_exception &e) {
         throw data_set_exception{ e.what() };
     }
 
 template <typename U>
-data_set<U>::data_set(const std::vector<std::vector<real_type>> &data_points, min_max_scaler scale_parameter) try :
-    data_set{ soa_matrix<real_type>{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(scale_parameter) } {}
+data_set<U>::data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, min_max_scaler scaler) try :
+    data_set{ std::move(comm), soa_matrix<real_type>{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(scaler) } {}
     catch (const matrix_exception &e) {
         throw data_set_exception{ e.what() };
     }
 
 template <typename U>
-data_set<U>::data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scale_parameter) try :
-    data_set{ soa_matrix<real_type>{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(labels), std::move(scale_parameter) } {}
+data_set<U>::data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scaler) try :
+    data_set{ std::move(comm), soa_matrix<real_type>{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(labels), std::move(scaler) } {}
     catch (const matrix_exception &e) {
         throw data_set_exception{ e.what() };
     }
@@ -439,9 +482,10 @@ data_set<U>::data_set(const std::vector<std::vector<real_type>> &data_points, st
 
 template <typename U>
 template <layout_type layout>
-data_set<U>::data_set(const matrix<real_type, layout> &data_points) :
+data_set<U>::data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points) :
     num_data_points_{ data_points.num_rows() },
     num_features_{ data_points.num_cols() },
+    comm_{ std::move(comm) },
     data_ptr_{ std::make_shared<soa_matrix<real_type>>(data_points, shape{ PADDING_SIZE, PADDING_SIZE }) } {
     // the provided data points vector may not be empty
     if (data_ptr_->num_rows() == 0) {
@@ -454,9 +498,10 @@ data_set<U>::data_set(const matrix<real_type, layout> &data_points) :
 
 template <typename U>
 template <layout_type layout>
-data_set<U>::data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels) :
+data_set<U>::data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, std::vector<label_type> labels) :
     num_data_points_{ data_points.num_rows() },
     num_features_{ data_points.num_cols() },
+    comm_{ std::move(comm) },
     data_ptr_{ std::make_shared<soa_matrix<real_type>>(data_points, shape{ PADDING_SIZE, PADDING_SIZE }) },
     labels_ptr_{ std::make_shared<std::vector<label_type>>(std::move(labels)) } {
     // the provided data points vector may not be empty
@@ -474,28 +519,39 @@ data_set<U>::data_set(const matrix<real_type, layout> &data_points, std::vector<
 
 template <typename U>
 template <layout_type layout>
-data_set<U>::data_set(const matrix<real_type, layout> &data_points, min_max_scaler scale_parameter) :
-    data_set{ data_points } {
+data_set<U>::data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, min_max_scaler scaler) :
+    data_set{ std::move(comm), data_points } {
+    // check whether the data set and scaler MPI communicators are identical
+    if (comm != scaler.communicator()) {
+        throw mpi_exception{ "The MPI communicators provided to the data set and scaler must be identical!" };
+    }
+
     // initialize scaling
-    scaler_ = std::make_shared<min_max_scaler>(std::move(scale_parameter));
+    scaler_ = std::make_shared<min_max_scaler>(std::move(scaler));
     // scale data set
     scaler_->scale(*data_ptr_);
 }
 
 template <typename U>
 template <layout_type layout>
-data_set<U>::data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scale_parameter) :
-    data_set{ data_points, std::move(labels) } {
+data_set<U>::data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
+    data_set{ std::move(comm), data_points, std::move(labels) } {
+    // check whether the data set and scaler MPI communicators are identical
+    if (comm != scaler.communicator()) {
+        throw mpi_exception{ "The MPI communicators provided to the data set and scaler must be identical!" };
+    }
+
     // initialize scaling
-    scaler_ = std::make_shared<min_max_scaler>(std::move(scale_parameter));
+    scaler_ = std::make_shared<min_max_scaler>(std::move(scaler));
     // scale data set
     scaler_->scale(*data_ptr_);
 }
 
 template <typename U>
-data_set<U>::data_set(soa_matrix<real_type> &&data_points) :
+data_set<U>::data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points) :
     num_data_points_{ data_points.num_rows() },
     num_features_{ data_points.num_cols() },
+    comm_{ std::move(comm) },
     data_ptr_{ std::make_shared<soa_matrix<real_type>>(std::move(data_points)) } {
     // the provided data points vector may not be empty
     if (data_ptr_->num_rows() == 0) {
@@ -511,9 +567,10 @@ data_set<U>::data_set(soa_matrix<real_type> &&data_points) :
 }
 
 template <typename U>
-data_set<U>::data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels) :
+data_set<U>::data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels) :
     num_data_points_{ data_points.num_rows() },
     num_features_{ data_points.num_cols() },
+    comm_{ std::move(comm) },
     data_ptr_{ std::make_shared<soa_matrix<real_type>>(std::move(data_points)) },
     labels_ptr_{ std::make_shared<std::vector<label_type>>(std::move(labels)) } {
     // the provided data points vector may not be empty
@@ -534,19 +591,29 @@ data_set<U>::data_set(soa_matrix<real_type> &&data_points, std::vector<label_typ
 }
 
 template <typename U>
-data_set<U>::data_set(soa_matrix<real_type> &&data_points, min_max_scaler scale_parameter) :
-    data_set{ std::move(data_points) } {
+data_set<U>::data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, min_max_scaler scaler) :
+    data_set{ std::move(comm), std::move(data_points) } {
+    // check whether the data set and scaler MPI communicators are identical
+    if (comm != scaler.communicator()) {
+        throw mpi_exception{ "The MPI communicators provided to the data set and scaler must be identical!" };
+    }
+
     // initialize scaling
-    scaler_ = std::make_shared<min_max_scaler>(std::move(scale_parameter));
+    scaler_ = std::make_shared<min_max_scaler>(std::move(scaler));
     // scale data set
     scaler_->scale(*data_ptr_);
 }
 
 template <typename U>
-data_set<U>::data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scale_parameter) :
-    data_set{ std::move(data_points), std::move(labels) } {
+data_set<U>::data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler) :
+    data_set{ std::move(comm), std::move(data_points), std::move(labels) } {
+    // check whether the data set and scaler MPI communicators are identical
+    if (comm != scaler.communicator()) {
+        throw mpi_exception{ "The MPI communicators provided to the data set and scaler must be identical!" };
+    }
+
     // initialize scaling
-    scaler_ = std::make_shared<min_max_scaler>(std::move(scale_parameter));
+    scaler_ = std::make_shared<min_max_scaler>(std::move(scaler));
     // scale data set
     scaler_->scale(*data_ptr_);
 }
@@ -554,25 +621,27 @@ data_set<U>::data_set(soa_matrix<real_type> &&data_points, std::vector<label_typ
 template <typename U>
 void data_set<U>::save(const std::string &filename, const file_format_type format) const {
     // save the data set
-    if (this->has_labels()) {
-        // save data with labels
-        switch (format) {
-            case file_format_type::libsvm:
-                detail::io::write_libsvm_data(filename, *data_ptr_, *labels_ptr_);
-                break;
-            case file_format_type::arff:
-                detail::io::write_arff_data(filename, *data_ptr_, *labels_ptr_);
-                break;
-        }
-    } else {
-        // save data without labels
-        switch (format) {
-            case file_format_type::libsvm:
-                detail::io::write_libsvm_data(filename, *data_ptr_);
-                break;
-            case file_format_type::arff:
-                detail::io::write_arff_data(filename, *data_ptr_);
-                break;
+    if (comm_.is_main_rank()) {
+        if (this->has_labels()) {
+            // save data with labels
+            switch (format) {
+                case file_format_type::libsvm:
+                    detail::io::write_libsvm_data(filename, *data_ptr_, *labels_ptr_);
+                    break;
+                case file_format_type::arff:
+                    detail::io::write_arff_data(filename, *data_ptr_, *labels_ptr_);
+                    break;
+            }
+        } else {
+            // save data without labels
+            switch (format) {
+                case file_format_type::libsvm:
+                    detail::io::write_libsvm_data(filename, *data_ptr_);
+                    break;
+                case file_format_type::arff:
+                    detail::io::write_arff_data(filename, *data_ptr_);
+                    break;
+            }
         }
     }
 }
diff --git a/include/plssvm/data_set/min_max_scaler.hpp b/include/plssvm/data_set/min_max_scaler.hpp
index 50ed89d5a..7935d5c96 100644
--- a/include/plssvm/data_set/min_max_scaler.hpp
+++ b/include/plssvm/data_set/min_max_scaler.hpp
@@ -14,12 +14,11 @@
 #pragma once
 
 #include "plssvm/constants.hpp"                            // plssvm::real_type
-#include "plssvm/detail/io/file_reader.hpp"                // plssvm::detail::io::file_reader
-#include "plssvm/detail/io/scaling_factors_parsing.hpp"    // plssvm::detail::io::parse_scaling_factors
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking_entry
 #include "plssvm/exceptions/exceptions.hpp"                // plssvm::min_max_scaler_exception
 #include "plssvm/matrix.hpp"                               // plssvm::matrix, plssvm::layout_type
+#include "plssvm/mpi/communicator.hpp"                     // plssvm::mpi::communicator
 #include "plssvm/verbosity_levels.hpp"                     // plssvm::verbosity_level
 
 #include "fmt/format.h"  // fmt::format
@@ -27,11 +26,10 @@
 #include <algorithm>  // std::min, std::max, std::sort, std::adjacent_find
 #include <chrono>     // std::chrono::{time_point, steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t
-#include <numeric>    // std::numeric_limits::{min, max}
+#include <limits>     // std::numeric_limits::{max, lowest}
 #include <optional>   // std::optional, std::make_optional, std::nullopt
 #include <string>     // std::string
-#include <tuple>      // std::tie
-#include <utility>    // std::pair, std::make_pair
+#include <utility>    // std::pair
 #include <vector>     // std::vector
 
 namespace plssvm {
@@ -43,6 +41,7 @@ class min_max_scaler {
   public:
     /**
      * @brief The calculated or read feature-wise scaling factors.
+     * @details Note that the feature indices are zero-based and not one-based.
      */
     struct factors {
         /// The used size type.
@@ -79,12 +78,28 @@ class min_max_scaler {
      * @throws plssvm::data_set_exception if lower is greater or equal than upper
      */
     min_max_scaler(real_type lower, real_type upper);
+    /**
+     * @brief Create a new scaling class that can be used to scale all features of a data set to the interval [lower, upper].
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] lower the lower bound value of all features
+     * @param[in] upper the upper bound value of all features
+     * @throws plssvm::data_set_exception if lower is greater or equal than upper
+     */
+    min_max_scaler(mpi::communicator comm, real_type lower, real_type upper);
+
     /**
      * @brief Read the scaling interval and factors from the provided file @p filename.
      * @param[in] filename the filename to read the scaling information from
      * @throws plssvm::invalid_file_format_exception all exceptions thrown by the plssvm::detail::io::parse_scaling_factors function
      */
     min_max_scaler(const std::string &filename);  // can't be explicit due to the data_set_variant
+    /**
+     * @brief Read the scaling interval and factors from the provided file @p filename.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the filename to read the scaling information from
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by the plssvm::detail::io::parse_scaling_factors function
+     */
+    min_max_scaler(mpi::communicator comm, const std::string &filename);  // can't be explicit due to the data_set_variant
 
     /**
      * @brief Save the scaling factors to the file @p filename.
@@ -126,42 +141,23 @@ class min_max_scaler {
         }
     }
 
+    /**
+     * @brief Get the associated MPI communicator.
+     * @return the MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const mpi::communicator &communicator() const noexcept {
+        return comm_;
+    }
+
   private:
     /// The user-provided scaling interval. After scaling, all feature values are scaled to [lower, upper].
     std::pair<real_type, real_type> scaling_interval_{};
     /// The scaling factors for all features.
     std::vector<factors> scaling_factors_{};
-};
-
-inline min_max_scaler::min_max_scaler(const real_type lower, const real_type upper) :
-    scaling_interval_{ std::make_pair(lower, upper) } {
-    if (lower >= upper) {
-        throw min_max_scaler_exception{ fmt::format("Inconsistent scaling interval specification: lower ({}) must be less than upper ({})!", lower, upper) };
-    }
-}
-
-inline min_max_scaler::min_max_scaler(const std::string &filename) {
-    // open the file
-    detail::io::file_reader reader{ filename };
-    reader.read_lines('#');
-
-    // read scaling values from file
-    std::tie(scaling_interval_, scaling_factors_) = detail::io::parse_scaling_factors<factors>(reader);
-}
 
-inline void min_max_scaler::save(const std::string &filename) const {
-    const std::chrono::time_point start_time = std::chrono::steady_clock::now();
-
-    // write scaling values to file
-    detail::io::write_scaling_factors(filename, scaling_interval_, scaling_factors_);
-
-    const std::chrono::time_point end_time = std::chrono::steady_clock::now();
-    detail::log(verbosity_level::full | verbosity_level::timing,
-                "Write {} scaling factors in {} to the file '{}'.\n",
-                detail::tracking::tracking_entry{ "scaling_factors_write", "num_scaling_factors", scaling_factors_.size() },
-                detail::tracking::tracking_entry{ "scaling_factors_write", "time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) },
-                detail::tracking::tracking_entry{ "scaling_factors_write", "filename", filename });
-}
+    /// The used MPI communicator.
+    mpi::communicator comm_{};
+};
 
 template <layout_type layout>
 void min_max_scaler::scale(plssvm::matrix<real_type, layout> &data) {
@@ -204,7 +200,7 @@ void min_max_scaler::scale(plssvm::matrix<real_type, layout> &data) {
         std::sort(scaling_factors_.begin(), scaling_factors_.end(), scaling_factors_comp_less);
         // check whether the biggest feature index is smaller than the number of features
         if (scaling_factors_.back().feature >= num_features) {
-            throw min_max_scaler_exception{ fmt::format("The maximum scaling feature index most not be greater than {}, but is {}!", num_features - 1, scaling_factors_.back().feature) };
+            throw min_max_scaler_exception{ fmt::format("The maximum scaling feature index most not be greater or equal than {}, but is {}!", num_features, scaling_factors_.back().feature) };
         }
         // check that there are no duplicate entries
         const auto scaling_factors_comp_eq = [](const factors &lhs, const factors &rhs) { return lhs.feature == rhs.feature; };
@@ -227,6 +223,7 @@ void min_max_scaler::scale(plssvm::matrix<real_type, layout> &data) {
 
     const std::chrono::time_point end_time = std::chrono::steady_clock::now();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                comm_,
                 "Scaled the data set to the range [{}, {}] in {}.\n",
                 detail::tracking::tracking_entry{ "data_set_scale", "lower", lower },
                 detail::tracking::tracking_entry{ "data_set_scale", "upper", upper },
diff --git a/include/plssvm/data_set/regression_data_set.hpp b/include/plssvm/data_set/regression_data_set.hpp
index 45c3cfe75..4e9831fb5 100644
--- a/include/plssvm/data_set/regression_data_set.hpp
+++ b/include/plssvm/data_set/regression_data_set.hpp
@@ -17,11 +17,12 @@
 #include "plssvm/data_set/data_set.hpp"                    // plssvm::data_set
 #include "plssvm/data_set/min_max_scaler.hpp"              // plssvm::min_max_scaler
 #include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking::tracking_entry
 #include "plssvm/detail/type_list.hpp"                     // plssvm::detail::{supported_label_types_regression, tuple_contains_v}
 #include "plssvm/file_format_types.hpp"                    // plssvm::file_format_type
 #include "plssvm/matrix.hpp"                               // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"                     // plssvm::mpi::communicator
 #include "plssvm/shape.hpp"                                // plssvm::shape
 #include "plssvm/verbosity_levels.hpp"                     // plssvm::verbosity_level
 
@@ -75,104 +76,450 @@ class regression_data_set : public data_set<U> {
     using svm_fit_type = ::plssvm::csvr;
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::string &)
+     * @brief Read the data points from the file @p filename.
+     *        Automatically determines the plssvm::file_format_type based on the file extension.
+     * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] filename the file to read the data points from
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
      */
     explicit regression_data_set(const std::string &filename) :
-        base_data_set{ filename } { this->init(); }
+        base_data_set{ mpi::communicator{}, filename } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::string &, file_format_type)
+     * @brief Read the data points from the file @p filename.
+     *        Automatically determines the plssvm::file_format_type based on the file extension.
+     * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the file to read the data points from
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     */
+    regression_data_set(mpi::communicator comm, const std::string &filename) :
+        base_data_set{ std::move(comm), filename } { this->init(); }
+
+    /**
+     * @brief Read the data points from the file @p filename assuming that the file is given in the @p plssvm::file_format_type.
+     * @param[in] filename the file to read the data points from
+     * @param[in] format the assumed file format used to parse the data points
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
      */
     regression_data_set(const std::string &filename, file_format_type format) :
-        base_data_set{ filename, format } { this->init(); }
+        base_data_set{ mpi::communicator{}, filename, format } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::string &, min_max_scaler)
+     * @brief Read the data points from the file @p filename assuming that the file is given in the @p plssvm::file_format_type.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the file to read the data points from
+     * @param[in] format the assumed file format used to parse the data points
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     */
+    regression_data_set(mpi::communicator comm, const std::string &filename, file_format_type format) :
+        base_data_set{ std::move(comm), filename, format } { this->init(); }
+
+    /**
+     * @brief Read the data points from the file @p filename and scale it using the provided @p scaler.
+     *        Automatically determines the plssvm::file_format_type based on the file extension.
+     * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] filename the file to read the data points from
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     regression_data_set(const std::string &filename, min_max_scaler scaler) :
-        base_data_set{ filename, std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, filename, std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::string &, file_format_type, min_max_scaler)
+     * @brief Read the data points from the file @p filename and scale it using the provided @p scaler.
+     *        Automatically determines the plssvm::file_format_type based on the file extension.
+     * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the file to read the data points from
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    regression_data_set(mpi::communicator comm, const std::string &filename, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), filename, std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Read the data points from the file @p filename assuming that the file is given in the plssvm::file_format_type @p format and
+     *        scale it using the provided @p scaler.
+     * @param[in] filename the file to read the data points from
+     * @param[in] format the assumed file format used to parse the data points
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     regression_data_set(const std::string &filename, file_format_type format, min_max_scaler scaler) :
-        base_data_set{ filename, format, std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, filename, format, std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Read the data points from the file @p filename assuming that the file is given in the plssvm::file_format_type @p format and
+     *        scale it using the provided @p scaler.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the file to read the data points from
+     * @param[in] format the assumed file format used to parse the data points
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    regression_data_set(mpi::communicator comm, const std::string &filename, file_format_type format, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), filename, format, std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &)
+     * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
      */
     explicit regression_data_set(const std::vector<std::vector<real_type>> &data_points) :
-        base_data_set{ data_points } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points } { this->init(); }
+
+    /**
+     * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     */
+    regression_data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points) :
+        base_data_set{ std::move(comm), data_points } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, std::vector<label_type>)
+     * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and copying the @p labels.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      */
     regression_data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels) :
-        base_data_set{ data_points, std::move(labels) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(labels) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, min_max_scaler)
+     * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and copying the @p labels.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     */
+    regression_data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels) :
+        base_data_set{ std::move(comm), data_points, std::move(labels) } { this->init(); }
+
+    /**
+     * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and scale them using the provided @p scaler.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     regression_data_set(const std::vector<std::vector<real_type>> &data_points, min_max_scaler scaler) :
-        base_data_set{ data_points, std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, std::vector<label_type>, min_max_scaler)
+     * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and scale them using the provided @p scaler.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    regression_data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), data_points, std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and copying the @p labels and scale the @p data_points using the provided @p scaler.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     regression_data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
-        base_data_set{ data_points, std::move(labels), std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(labels), std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Create a new data set  by converting the provided @p data_points to a plssvm::matrix and copying the @p labels and scale the @p data_points using the provided @p scaler.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    regression_data_set(mpi::communicator comm, const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), data_points, std::move(labels), std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &)
+     * @brief Create a new data set from the provided @p data_points.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
      */
     template <layout_type layout>
     explicit regression_data_set(const matrix<real_type, layout> &data_points) :
-        base_data_set{ data_points } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points } { this->init(); }
+
+    /**
+     * @brief Create a new data set from the provided @p data_points.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     */
+    template <layout_type layout>
+    regression_data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points) :
+        base_data_set{ std::move(comm), data_points } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, std::vector<label_type>)
+     * @brief Create a new data set from the provided @p data_points and @p labels.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      */
     template <layout_type layout>
     regression_data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels) :
-        base_data_set{ data_points, std::move(labels) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(labels) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, min_max_scaler)
+     * @brief Create a new data set from the provided @p data_points and @p labels.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     */
+    template <layout_type layout>
+    regression_data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, std::vector<label_type> labels) :
+        base_data_set{ std::move(comm), data_points, std::move(labels) } { this->init(); }
+
+    /**
+     * @brief Create a new data set from the the provided @p data_points and scale them using the provided @p scaler.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     template <layout_type layout>
     regression_data_set(const matrix<real_type, layout> &data_points, min_max_scaler scaler) :
-        base_data_set{ data_points, std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, std::vector<label_type>, min_max_scaler)
+     * @brief Create a new data set from the the provided @p data_points and scale them using the provided @p scaler.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    template <layout_type layout>
+    regression_data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), data_points, std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Create a new data set from the the provided @p data_points and @p labels and scale the @p data_points using the provided @p scaler.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     template <layout_type layout>
     regression_data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
-        base_data_set{ data_points, std::move(labels), std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, data_points, std::move(labels), std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Create a new data set from the the provided @p data_points and @p labels and scale the @p data_points using the provided @p scaler.
+     * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
+     * @tparam layout the layout type of the input matrix
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    template <layout_type layout>
+    regression_data_set(mpi::communicator comm, const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), data_points, std::move(labels), std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&)
+     * @brief Use the provided @p data_points in this data set.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
      */
     explicit regression_data_set(soa_matrix<real_type> &&data_points) :
-        base_data_set{ std::move(data_points) } { this->init(); }
+        base_data_set{ mpi::communicator{}, std::move(data_points) } { this->init(); }
+
+    /**
+     * @brief Use the provided @p data_points in this data set.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     */
+    regression_data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points) :
+        base_data_set{ std::move(comm), std::move(data_points) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&, std::vector<label_type> &&)
+     * @brief Use the provided @p data_points and @p labels in this data set.
+     * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
      */
     regression_data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels) :
-        base_data_set{ std::move(data_points), std::move(labels) } { this->init(); }
+        base_data_set{ mpi::communicator{}, std::move(data_points), std::move(labels) } { this->init(); }
+
+    /**
+     * @brief Use the provided @p data_points and @p labels in this data set.
+     * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     */
+    regression_data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels) :
+        base_data_set{ std::move(comm), std::move(data_points), std::move(labels) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&, min_max_scaler)
+     * @brief Use the provided @p data_points in this data set and scale them using the provided @p scaler.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     regression_data_set(soa_matrix<real_type> &&data_points, min_max_scaler scaler) :
-        base_data_set{ std::move(data_points), std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, std::move(data_points), std::move(scaler) } { this->init(); }
 
     /**
-     * @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&, std::vector<label_type> &&, min_max_scaler)
+     * @brief Use the provided @p data_points in this data set and scale them using the provided @p scaler.
+     * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
+     * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    regression_data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), std::move(data_points), std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Use the provided @p data_points and @p labels in this data set and scale them using the provided @p scaler.
+     * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
      */
     regression_data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler) :
-        base_data_set{ std::move(data_points), std::move(labels), std::move(scaler) } { this->init(); }
+        base_data_set{ mpi::communicator{}, std::move(data_points), std::move(labels), std::move(scaler) } { this->init(); }
+
+    /**
+     * @brief Use the provided @p data_points and @p labels in this data set and scale them using the provided @p scaler.
+     * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] data_points the data points used in this data set
+     * @param[in] labels the labels used in this data set
+     * @param[in] scaler the parameters used to scale the data set feature values to a given range
+     * @throws plssvm::data_set_exception if the @p data_points vector is empty
+     * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
+     * @throws plssvm::data_set_exception if any @p data_point has no features
+     * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
+     * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
+     * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
+     * @throws plssvm::mpi_exception if the MPI communicator @p comm and the MPI communicator in @p scaler are not identical
+     */
+    regression_data_set(mpi::communicator comm, soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler) :
+        base_data_set{ std::move(comm), std::move(data_points), std::move(labels), std::move(scaler) } { this->init(); }
 
     /**
      * @copydoc plssvm::data_set::save
@@ -203,6 +550,7 @@ void regression_data_set<U>::init() {
     }
 
     detail::log(verbosity_level::full | verbosity_level::timing,
+                this->communicator(),
                 "Created a regression data set with {} data points and {} features.\n",
                 detail::tracking::tracking_entry{ "data_set_create", "num_data_points", this->num_data_points() },
                 detail::tracking::tracking_entry{ "data_set_create", "num_features", this->num_features() });
@@ -217,6 +565,7 @@ void regression_data_set<U>::save(const std::string &filename, const file_format
 
     const std::chrono::time_point end_time = std::chrono::steady_clock::now();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                this->communicator(),
                 "Write {} regression data points with {} features in {} to the {} file '{}'.\n",
                 detail::tracking::tracking_entry{ "data_set_write", "num_data_points", this->num_data_points() },
                 detail::tracking::tracking_entry{ "data_set_write", "num_features", this->num_features() },
diff --git a/include/plssvm/detail/assert.hpp b/include/plssvm/detail/assert.hpp
index de66c9931..7fb34d0a5 100644
--- a/include/plssvm/detail/assert.hpp
+++ b/include/plssvm/detail/assert.hpp
@@ -14,12 +14,14 @@
 #pragma once
 
 #include "plssvm/exceptions/source_location.hpp"  // plssvm::source_location
+#include "plssvm/mpi/environment.hpp"             // plssvm::mpi::{is_active, abort_world}
 
 #include "fmt/color.h"   // fmt::emphasis, fmt::fg, fmt::color
 #include "fmt/format.h"  // fmt::format
 
 #include <cstdlib>      // std::abort
 #include <iostream>     // std::cerr, std::endl
+#include <string>       // std::string
 #include <string_view>  // std::string_view
 #include <utility>      // std::forward
 
@@ -42,19 +44,25 @@ inline void check_assertion(const bool cond, const std::string_view cond_str, co
         // print assertion error message
         std::cerr << fmt::format(
             "Assertion '{}' failed!\n"
-            "  in file      {}\n"
-            "  in function  {}\n"
-            "  @ line       {}\n\n"
+            "{}"
+            "  in file            {}\n"
+            "  in function        {}\n"
+            "  @ line             {}\n\n"
             "{}\n",
             fmt::format(fmt::emphasis::bold | fmt::fg(fmt::color::green), "{}", cond_str),
+            loc.world_rank().has_value() ? fmt::format("  on MPI world rank  {}\n", loc.world_rank().value()) : std::string{},
             loc.file_name(),
             loc.function_name(),
             loc.line(),
             fmt::format(fmt::emphasis::bold | fmt::fg(fmt::color::red), msg, std::forward<Args>(args)...))
                   << std::endl;
 
-        // abort further execution
-        std::abort();
+        // abort further execution -> call MPI_Abort if in an MPI environment
+        if (mpi::is_active()) {
+            mpi::abort_world();
+        } else {
+            std::abort();
+        }
     }
 }
 
diff --git a/include/plssvm/detail/cmd/data_set_variants.hpp b/include/plssvm/detail/cmd/data_set_variants.hpp
index ba02144a6..95325ed0e 100644
--- a/include/plssvm/detail/cmd/data_set_variants.hpp
+++ b/include/plssvm/detail/cmd/data_set_variants.hpp
@@ -22,9 +22,11 @@
 #include "plssvm/detail/cmd/parser_scale.hpp"           // plssvm::detail::cmd::parser_scale
 #include "plssvm/detail/cmd/parser_train.hpp"           // plssvm::detail::cmd::parser_train
 #include "plssvm/detail/utility.hpp"                    // plssvm::detail::unreachable
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
 #include "plssvm/svm_types.hpp"                         // plssvm::svm_type, plssvm::svm_type_from_model_file
 
 #include <string>   // std::string
+#include <utility>  // std::move
 #include <variant>  // std::variant
 
 namespace plssvm::detail::cmd {
@@ -60,32 +62,34 @@ template <typename label_type, typename... Args>
 
 /**
  * @brief Create a plssvm::min_max_scaler based on the provided command line arguments.
+ * @param[in] comm the MPI communicator wrapper
  * @param[in] cmd_parser the command line arguments
  * @return the constructed plssvm::min_max_scaler (`[[nodiscard]]`)
  */
-[[nodiscard]] inline min_max_scaler make_scaling_factors(const cmd::parser_scale &cmd_parser) {
+[[nodiscard]] inline min_max_scaler make_scaling_factors(mpi::communicator comm, const cmd::parser_scale &cmd_parser) {
     if (!cmd_parser.restore_filename.empty()) {
-        return min_max_scaler{ cmd_parser.restore_filename };
+        return min_max_scaler{ std::move(comm), cmd_parser.restore_filename };
     } else {
-        return min_max_scaler{ cmd_parser.lower, cmd_parser.upper };
+        return min_max_scaler{ std::move(comm), cmd_parser.lower, cmd_parser.upper };
     }
 }
 
 /**
  * @brief Return the correct data set based on the plssvm::detail::cmd::parser_train command line options.
+ * @param[in] comm the MPI communicator wrapper
  * @param[in] cmd_parser the provided command line parser
  * @return the data set based on the provided command line parser (`[[nodiscard]]`)
  */
-[[nodiscard]] inline data_set_variants data_set_factory(const cmd::parser_train &cmd_parser) {
+[[nodiscard]] inline data_set_variants data_set_factory(mpi::communicator comm, const cmd::parser_train &cmd_parser) {
     switch (cmd_parser.svm) {
         case svm_type::csvc:
             if (cmd_parser.strings_as_labels) {
-                return make_classification_data_set<std::string>(cmd_parser.input_filename);
+                return make_classification_data_set<std::string>(std::move(comm), cmd_parser.input_filename);
             } else {
-                return make_classification_data_set<typename classification_data_set<>::label_type>(cmd_parser.input_filename);
+                return make_classification_data_set<typename classification_data_set<>::label_type>(std::move(comm), cmd_parser.input_filename);
             }
         case svm_type::csvr:
-            return make_regression_data_set<typename regression_data_set<>::label_type>(cmd_parser.input_filename);
+            return make_regression_data_set<typename regression_data_set<>::label_type>(std::move(comm), cmd_parser.input_filename);
     }
     // can never be reached
     ::plssvm::detail::unreachable();
@@ -94,19 +98,20 @@ template <typename label_type, typename... Args>
 /**
  * @brief Return the correct data set based on the plssvm::detail::cmd::parser_predict command line options.
  * @details Infers the C-SVM type from the provided model file header.
+ * @param[in] comm the MPI communicator wrapper
  * @param[in] cmd_parser the provided command line parser
  * @return the data set based on the provided command line parser (`[[nodiscard]]`)
  */
-[[nodiscard]] inline data_set_variants data_set_factory(const cmd::parser_predict &cmd_parser) {
+[[nodiscard]] inline data_set_variants data_set_factory(mpi::communicator comm, const cmd::parser_predict &cmd_parser) {
     switch (svm_type_from_model_file(cmd_parser.model_filename)) {
         case svm_type::csvc:
             if (cmd_parser.strings_as_labels) {
-                return make_classification_data_set<std::string>(cmd_parser.input_filename);
+                return make_classification_data_set<std::string>(std::move(comm), cmd_parser.input_filename);
             } else {
-                return make_classification_data_set<typename classification_data_set<>::label_type>(cmd_parser.input_filename);
+                return make_classification_data_set<typename classification_data_set<>::label_type>(std::move(comm), cmd_parser.input_filename);
             }
         case svm_type::csvr:
-            return make_regression_data_set<typename regression_data_set<>::label_type>(cmd_parser.input_filename);
+            return make_regression_data_set<typename regression_data_set<>::label_type>(std::move(comm), cmd_parser.input_filename);
     }
     // can never be reached
     ::plssvm::detail::unreachable();
@@ -115,15 +120,16 @@ template <typename label_type, typename... Args>
 /**
  * @brief Return the correct data set based on the plssvm::detail::cmd::parser_scale command line options.
  * @details **Always** uses a classification data set since it allows more different label types.
+ * @param[in] comm the MPI communicator wrapper
  * @param[in] cmd_parser the provided command line parser
  * @return the data set based on the provided command line parser (`[[nodiscard]]`)
  */
-[[nodiscard]] inline data_set_variants data_set_factory(const cmd::parser_scale &cmd_parser) {
+[[nodiscard]] inline data_set_variants data_set_factory(mpi::communicator comm, const cmd::parser_scale &cmd_parser) {
     if (cmd_parser.strings_as_labels) {
-        return make_classification_data_set<std::string>(cmd_parser.input_filename, make_scaling_factors(cmd_parser));
+        return make_classification_data_set<std::string>(comm, cmd_parser.input_filename, make_scaling_factors(comm, cmd_parser));
     } else {
         using label_type = typename classification_data_set<>::label_type;
-        return make_classification_data_set<label_type>(cmd_parser.input_filename, make_scaling_factors(cmd_parser));
+        return make_classification_data_set<label_type>(comm, cmd_parser.input_filename, make_scaling_factors(comm, cmd_parser));
     }
 }
 
diff --git a/include/plssvm/detail/cmd/parser_predict.hpp b/include/plssvm/detail/cmd/parser_predict.hpp
index 4ba2e1a65..5d930aa19 100644
--- a/include/plssvm/detail/cmd/parser_predict.hpp
+++ b/include/plssvm/detail/cmd/parser_predict.hpp
@@ -16,6 +16,7 @@
 #include "plssvm/backend_types.hpp"                       // plssvm::backend_type
 #include "plssvm/backends/Kokkos/execution_space.hpp"     // plssvm::kokkos::execution_space
 #include "plssvm/backends/SYCL/implementation_types.hpp"  // plssvm::sycl::implementation_type
+#include "plssvm/mpi/communicator.hpp"                    // plssvm::mpi::communicator
 #include "plssvm/target_platforms.hpp"                    // plssvm::target_platform
 
 #include "fmt/base.h"     // fmt::formatter
@@ -23,6 +24,7 @@
 
 #include <iosfwd>  // forward declare std::ostream
 #include <string>  // std::string
+#include <vector>  // std::vector
 
 namespace plssvm::detail::cmd {
 
@@ -33,10 +35,11 @@ struct parser_predict {
     /**
      * @brief Parse the command line arguments @p argv using [`cxxopts`](https://github.com/jarro2783/cxxopts) and set the predict parameters accordingly.
      * @details If no output filename is given, uses the input filename and appends a ".predict". The output file is than saved in the current working directory.
+     * @param[in] comm the MPI communicator wrapper
      * @param[in] argc the number of passed command line arguments
      * @param[in] argv the command line arguments
      */
-    parser_predict(int argc, char **argv);
+    parser_predict(const mpi::communicator &comm, int argc, char **argv);
 
     /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, SYCL, or Kokkos.
     backend_type backend{ backend_type::automatic };
@@ -52,6 +55,10 @@ struct parser_predict {
     /// `true` if `std::string` should be used as label type instead of the default type `ìnt`.
     bool strings_as_labels{ false };
 
+    /// Load balancing weights for MPI used if different hardware per MPI process is used. The number must match the number of spawned MPI processes.
+    /// Providing [1, 1] means every process gets the same amount of work, providing [1, 3] means that the second process has three times the work to do compared to process zero.
+    std::vector<std::size_t> mpi_load_balancing_weights{};
+
     /// The name of the data file to predict.
     std::string input_filename{};
     /// The name of the model file containing the support vectors and weights used for prediction.
diff --git a/include/plssvm/detail/cmd/parser_scale.hpp b/include/plssvm/detail/cmd/parser_scale.hpp
index 925bebb80..402b62f8f 100644
--- a/include/plssvm/detail/cmd/parser_scale.hpp
+++ b/include/plssvm/detail/cmd/parser_scale.hpp
@@ -15,6 +15,7 @@
 
 #include "plssvm/constants.hpp"          // plssvm::real_type
 #include "plssvm/file_format_types.hpp"  // plssvm::file_format_type
+#include "plssvm/mpi/communicator.hpp"   // plssvm::mpi::communicator
 
 #include "fmt/base.h"     // fmt::formatter
 #include "fmt/ostream.h"  // mt::ostream_formatter
@@ -31,10 +32,11 @@ struct parser_scale {
     /**
      * @brief Parse the command line arguments @p argv using [`cxxopts`](https://github.com/jarro2783/cxxopts) and set the scale parameters accordingly.
      * @details If no scaled filename is given, the scaled data is directly output to the terminal (the default behavior of LIBSVM).
+     * @param[in] comm the MPI communicator wrapper
      * @param[in] argc the number of passed command line arguments
      * @param[in] argv the command line arguments
      */
-    parser_scale(int argc, char **argv);
+    parser_scale(const mpi::communicator &comm, int argc, char **argv);
 
     /// The lower bound of the scaled data values.
     real_type lower{ -1.0 };
diff --git a/include/plssvm/detail/cmd/parser_train.hpp b/include/plssvm/detail/cmd/parser_train.hpp
index 9b41cbc31..6ddae10ac 100644
--- a/include/plssvm/detail/cmd/parser_train.hpp
+++ b/include/plssvm/detail/cmd/parser_train.hpp
@@ -19,6 +19,7 @@
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
 #include "plssvm/classification_types.hpp"                   // plssvm::classification_type
 #include "plssvm/constants.hpp"                              // plssvm::real_type
+#include "plssvm/mpi/communicator.hpp"                       // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                              // plssvm::parameter
 #include "plssvm/solver_types.hpp"                           // plssvm::solving_type
 #include "plssvm/svm_types.hpp"                              // plssvm::svm_type
@@ -30,6 +31,7 @@
 #include <cstddef>  // std::size_t
 #include <iosfwd>   // forward declare std::ostream
 #include <string>   // std::string
+#include <vector>   // std::vector
 
 namespace plssvm::detail::cmd {
 
@@ -40,10 +42,11 @@ struct parser_train {
     /**
      * @brief Parse the command line arguments @p argv using [`cxxopts`](https://github.com/jarro2783/cxxopts) and set the training parameters accordingly.
      * @details If no model filename is given, uses the input filename and appends a ".model". The model file is than saved in the current working directory.
+     * @param[in] comm the MPI communicator wrapper
      * @param[in] argc the number of passed command line arguments
      * @param[in] argv the command line arguments
      */
-    parser_train(int argc, char **argv);
+    parser_train(const mpi::communicator &comm, int argc, char **argv);
 
     /// Other base C-SVM parameters
     plssvm::parameter csvm_params{};
@@ -76,6 +79,10 @@ struct parser_train {
     /// For the regression task, this parameter is ignored and `real_type` is always used.
     bool strings_as_labels{ false };
 
+    /// Load balancing weights for MPI used if different hardware per MPI process is used. The number must match the number of spawned MPI processes.
+    /// Providing [1, 1] means every process gets the same amount of work, providing [1, 3] means that the second process has three times the work to do compared to process zero.
+    std::vector<std::size_t> mpi_load_balancing_weights{};
+
     /// The name of the data/test file to parse.
     std::string input_filename{};
     /// The name of the model file to write the learned support vectors to/to parse the saved model from.
diff --git a/include/plssvm/detail/data_distribution.hpp b/include/plssvm/detail/data_distribution.hpp
index 659568647..af4043a79 100644
--- a/include/plssvm/detail/data_distribution.hpp
+++ b/include/plssvm/detail/data_distribution.hpp
@@ -14,13 +14,16 @@
 #pragma once
 
 #include "plssvm/detail/memory_size.hpp"  // plssvm:detail::memory_size
+#include "plssvm/mpi/communicator.hpp"    // plssvm::mpi::communicator
 
 #include "fmt/base.h"     // fmt::formatter
 #include "fmt/ostream.h"  // fmt::ostream_formatter
 
-#include <cstddef>  // std::size_t
-#include <iosfwd>   // std::ostream forward declaration
-#include <vector>   // std::vector
+#include <algorithm>  // std::fill
+#include <cstddef>    // std::size_t
+#include <iosfwd>     // std::ostream forward declaration
+#include <numeric>    // std::accumulate
+#include <vector>     // std::vector
 
 namespace plssvm::detail {
 
@@ -76,25 +79,91 @@ class data_distribution {
      */
     [[nodiscard]] std::size_t num_rows() const noexcept;
     /**
-     * @brief The number of places (e.g., devices) to which the rows has been distributed.
-     * @return the number of places (`[[nodiscard]]`)
+     * @brief The number of places (e.g., devices) on the current MPI rank to which the rows has been distributed.
+     * @return the number of places on the current MPI rank (`[[nodiscard]]`)
      */
     [[nodiscard]] std::size_t num_places() const noexcept;
+    /**
+     * @brief The total number of places (e.g., devices) across all MPI ranks to which the rows has been distributed.
+     * @return the total number of places across all MPI ranks (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::size_t total_num_places() const noexcept;
 
   protected:
     /**
      * @brief Construct a new data distribution being able to hold the distribution for @p num_places.
+     * @param[in] comm the used MPI communicator
      * @param[in] num_rows the number of rows to distribute (used to initialize the values in the distribution)
      * @param[in] num_places the number of places to distribute the rows to
      */
-    data_distribution(std::size_t num_rows, std::size_t num_places);
+    data_distribution(mpi::communicator comm, std::size_t num_rows, std::size_t num_places);
+
+    /**
+     * @brief Distribute the previously provided number of rows on all MPI ranks and places given the load balancing weights
+     *        using the distribution function @p distribute.
+     * @tparam DistributionFunction the type of the distribution function
+     * @param[in] distribute the distribution function
+     */
+    template <typename DistributionFunction>
+    void update_distribution(DistributionFunction distribute) {
+        // set all distribution values to "num_rows"
+        std::fill(distribution_.begin(), distribution_.end(), num_rows_);
+
+        if (!distribution_.empty()) {  // necessary to silence GCC "potential null pointer dereference [-Wnull-dereference]" warning
+            distribution_.front() = 0;
+        }
+
+        // calculate the weight sum
+        const std::size_t weight_sum = std::accumulate(load_balancing_weights_.cbegin(), load_balancing_weights_.cend(), std::size_t{ 0 });
+
+        // calculate the distribution for the MPI ranks based on the provided weights
+        const std::vector<std::size_t> weight_distribution = distribute(num_rows_, std::size_t{ 0 }, weight_sum);
+
+        // calculate the MPI rank distribution based on the weight distribution
+        std::vector<std::size_t> mpi_distribution(comm_.size() + 1, num_rows_);
+        for (std::size_t i = 0, idx = 0; i < comm_.size(); ++i) {
+            mpi_distribution[i] = weight_distribution[idx];
+            idx += load_balancing_weights_[i];
+        }
+
+        // update the final distribution with the information we already know (we don't know the correct distribution for MPI ranks with more than one place)
+        for (std::size_t i = 0, idx = 0; i < comm_.size(); ++i) {
+            distribution_[idx] = mpi_distribution[i];
+            idx += places_[i];
+        }
+
+        // now, if an MPI rank has more than one place, calculate the distribution based on the MPI rank's previous distribution
+        for (std::size_t i = 0, idx = 0; i < comm_.size(); ++i) {
+            if (places_[i] > 1) {
+                // calculate the required sub-distribution on the MPI rank i
+                const std::vector<std::size_t> sub_distribution = distribute(mpi_distribution[i + 1] - mpi_distribution[i], num_rows_ - mpi_distribution[i + 1], places_[i]);
+                // update the final distribution accordingly
+                for (std::size_t j = 0; j < places_[i]; ++j) {
+                    distribution_[idx + j + 1] = distribution_[idx + j] + (sub_distribution[j + 1] - sub_distribution[j]);
+                }
+            }
+            idx += places_[i];
+        }
+    }
 
-    /// The specific data distribution across the requested number of places.
-    std::vector<std::size_t> distribution_;
     /// The number of rows distributed.
     std::size_t num_rows_;
-    /// The number of places the rows should be distributed to.
+    /// The number of places on this MPI rank the rows should be distributed to.
     std::size_t num_places_;
+    /// The total number of places the rows should be distributed to.
+    std::size_t total_num_places_;
+    /// The number of places for each MPI rank.
+    std::vector<std::size_t> places_;
+
+    /// The used MPI communicator.
+    mpi::communicator comm_;
+    /// The number of places per MPI rank.
+    std::size_t rank_places_offset_;
+    /// The load balancing weights for each MPI rank.
+    std::vector<std::size_t> load_balancing_weights_;
+
+    /// The specific data distribution across the requested number of places.
+    std::vector<std::size_t> distribution_;
 };
 
 /**
@@ -118,7 +187,7 @@ class triangular_data_distribution : public data_distribution {
      * @brief Calculate the data distribution (i.e., the number of rows in the kernel matrix a *place* is responsible for) such that each *place* has
      *        approximately the same number of data points it is responsible for accounting only for the upper triangular matrix.
      * @details Example: if we have 10 data points, the number of entries in the triangular matrix is equal to 10 * (10 + 1) / 2 = 55.
-     *          If we want to distribute these 10 data points across 2 devices, each device would be responsible for the following rows/data points:
+     *          If we want to distribute these 10 data points across 2 devices (or 2 MPI ranks), each device would be responsible for the following rows/data points:
      *          - device 0: rows 0, 1, and 2 -> 10 + 9 + 8 = **27 matrix entries**
      *          - device 1: rows 3, 4, 5, 6, 7, 8, 9 -> 7 + 6 + 5 + 4 + 3 + 2 + 1 = **28 matrix entries**
      *          Therefore, each device is responsible for approximately the same number of **matrix entries** and **not** the same number of **rows**!
@@ -136,10 +205,11 @@ class triangular_data_distribution : public data_distribution {
      *          8           \  |
      *          9            \_|  ______
      *
+     * @param[in] comm the used MPI communicator
      * @param[in] num_rows the number of data points to distribute
      * @param[in] num_places the number of places, i.e., different devices to distribute the data to
      */
-    triangular_data_distribution(std::size_t num_rows, std::size_t num_places);
+    triangular_data_distribution(mpi::communicator comm, std::size_t num_rows, std::size_t num_places);
 
     /**
      * @brief Calculate the number of entries in the explicit kernel matrix for the current number of rows and @p place.
@@ -187,11 +257,13 @@ class triangular_data_distribution : public data_distribution {
 class rectangular_data_distribution : public data_distribution {
   public:
     /**
-     * @brief Calculate the data distribution (i.e., the number of rows in the kernel matrix a *place* is responsible for) such that each *place* has approximately the same number of data points it is responsible for.
+     * @brief Calculate the data distribution (i.e., the number of rows in the kernel matrix a *place* is responsible for) such that each
+     *        *place* has approximately the same number of data points it is responsible for.
+     * @param[in] comm the used MPI communicator
      * @param[in] num_rows the number of data points to distribute
      * @param[in] num_places the number of places, i.e., different devices to distribute the data to
      */
-    rectangular_data_distribution(std::size_t num_rows, std::size_t num_places);
+    rectangular_data_distribution(mpi::communicator comm, std::size_t num_rows, std::size_t num_places);
 };
 
 }  // namespace plssvm::detail
diff --git a/include/plssvm/detail/fast_float_wrapper.hpp b/include/plssvm/detail/fast_float_wrapper.hpp
new file mode 100644
index 000000000..1c674d695
--- /dev/null
+++ b/include/plssvm/detail/fast_float_wrapper.hpp
@@ -0,0 +1,35 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Wrapper around fast_float to prevent UB when using fast-math (which is always disabled in the wrapper library).
+ */
+
+#ifndef PLSSVM_DETAIL_FAST_FLOAT_WRAPPER_HPP_
+#define PLSSVM_DETAIL_FAST_FLOAT_WRAPPER_HPP_
+#pragma once
+
+#include <string_view>   // std::string_view
+#include <system_error>  // std:errc
+#include <utility>       // std::pair
+
+namespace plssvm::detail {
+
+/**
+ * @brief Converts the string @p str to a floating point value of type @p T.
+ * @details If @p T is a `long double` [`std::stold`](https://en.cppreference.com/w/cpp/string/basic_string/stof) is used since fast_float doesn't support long double,
+ *          otherwise [`float_fast::from_chars`](https://github.com/fastfloat/fast_float) is used.
+ * @tparam T the type to convert the value of @p str to, must be a floating point type
+ * @param[in] str the string to convert
+ * @return the value of type @p T denoted by @p str and the potential error code if the @p str couldn't be converted to the type @p T (`[[nodiscard]]`)
+ */
+template <typename T>
+[[nodiscard]] std::pair<T, std::errc> convert_to_floating_point(std::string_view str);
+
+}  // namespace plssvm::detail
+
+#endif  // PLSSVM_DETAIL_FAST_FLOAT_WRAPPER_HPP_
diff --git a/include/plssvm/detail/io/classification_libsvm_model_parsing.hpp b/include/plssvm/detail/io/classification_libsvm_model_parsing.hpp
index 60fabd47c..69b7ad676 100644
--- a/include/plssvm/detail/io/classification_libsvm_model_parsing.hpp
+++ b/include/plssvm/detail/io/classification_libsvm_model_parsing.hpp
@@ -20,13 +20,14 @@
 #include "plssvm/detail/assert.hpp"                     // PLSSVM_ASSERT
 #include "plssvm/detail/io/file_reader.hpp"             // plssvm::detail::io::file_reader
 #include "plssvm/detail/io/libsvm_parsing.hpp"          // plssvm::detail::io::parse_libsvm_num_features
-#include "plssvm/detail/logging.hpp"                    // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"  // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                // plssvm::memory_size, custom literals
 #include "plssvm/detail/string_conversion.hpp"          // plssvm::detail::{convert_to, split_as}
 #include "plssvm/detail/string_utility.hpp"             // plssvm::detail::{trim, trim_left, to_lower_case}
 #include "plssvm/gamma.hpp"                             // plssvm::get_gamma_string
 #include "plssvm/kernel_function_types.hpp"             // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                            // plssvm::soa_matrix
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                         // plssvm::parameter
 #include "plssvm/shape.hpp"                             // plssvm::shape
 #include "plssvm/verbosity_levels.hpp"                  // plssvm::verbosity_level
@@ -582,6 +583,7 @@ template <typename label_type>
  * @endcode
  * @tparam label_type the type of the labels (any arithmetic type, except bool, or std::string)
  * @param[in,out] out the output-stream to write the header information to
+ * @param[in] comm the used MPI communicator
  * @param[in] params the SVM parameters
  * @param[in] rho the rho values for the different classes resulting from the hyperplane learning
  * @param[in] data the data used to create the model
@@ -589,7 +591,7 @@ template <typename label_type>
  * @return the order of the different classes as it should appear in the following data section (`[[nodiscard]]`)
  */
 template <typename label_type>
-[[nodiscard]] inline std::vector<label_type> write_libsvm_model_header_classification(fmt::ostream &out, const plssvm::parameter &params, const std::vector<real_type> &rho, const classification_data_set<label_type> &data) {
+[[nodiscard]] inline std::vector<label_type> write_libsvm_model_header_classification(fmt::ostream &out, const mpi::communicator &comm, const plssvm::parameter &params, const std::vector<real_type> &rho, const classification_data_set<label_type> &data) {
     PLSSVM_ASSERT(data.has_labels(), "Cannot write a model file that does not include labels!");
     PLSSVM_ASSERT(!rho.empty(), "At least one rho value must be provided!");
 
@@ -635,9 +637,10 @@ template <typename label_type>
                               fmt::join(rho, " "));
 
     // print model header
-    detail::log(verbosity_level::full | verbosity_level::libsvm,
-                "\n{}\n",
-                out_string);
+    detail::log_untracked(verbosity_level::full | verbosity_level::libsvm,
+                          comm,
+                          "\n{}\n",
+                          out_string);
     // write model header to file
     out.print("{}", out_string);
 
@@ -667,6 +670,7 @@ template <typename label_type>
  * @endcode
  * @tparam label_type the type of the labels (any arithmetic type, except bool, or std::string)
  * @param[in] filename the file to write the LIBSVM model to
+ * @param[in] comm the used MPI communicator
  * @param[in] params the SVM parameters
  * @param[in] classification the used multi-class classification strategy
  * @param[in] rho the rho value resulting from the hyperplane learning
@@ -676,7 +680,7 @@ template <typename label_type>
  * @attention The PLSSVM model file is only compatible with LIBSVM for the one vs. one classification type.
  */
 template <typename label_type>
-inline void write_libsvm_model_data_classification(const std::string &filename, const plssvm::parameter &params, const classification_type classification, const std::vector<real_type> &rho, const std::vector<aos_matrix<real_type>> &alpha, const std::vector<std::vector<std::size_t>> &index_sets, const classification_data_set<label_type> &data) {
+inline void write_libsvm_model_data_classification(const std::string &filename, const mpi::communicator &comm, const plssvm::parameter &params, const classification_type classification, const std::vector<real_type> &rho, const std::vector<aos_matrix<real_type>> &alpha, const std::vector<std::vector<std::size_t>> &index_sets, const classification_data_set<label_type> &data) {
     PLSSVM_ASSERT(!filename.empty(), "The provided model filename must not be empty!");
     PLSSVM_ASSERT(data.has_labels(), "Cannot write a model file that does not include labels!");
     PLSSVM_ASSERT(rho.size() == calculate_number_of_classifiers(classification, data.num_classes()),
@@ -727,7 +731,7 @@ inline void write_libsvm_model_data_classification(const std::string &filename,
     fmt::ostream out = fmt::output_file(filename);
 
     // write header information
-    const std::vector<label_type> label_order = write_libsvm_model_header_classification(out, params, rho, data);
+    const std::vector<label_type> label_order = write_libsvm_model_header_classification(out, comm, params, rho, data);
 
     // the maximum size of one formatted LIBSVM entry, e.g., 1234:1.365363e+10
     // biggest number representable as std::size_t: 18446744073709551615 -> 20 chars
diff --git a/include/plssvm/detail/io/file_reader.hpp b/include/plssvm/detail/io/file_reader.hpp
index a6c318bfc..d89fc6152 100644
--- a/include/plssvm/detail/io/file_reader.hpp
+++ b/include/plssvm/detail/io/file_reader.hpp
@@ -160,6 +160,7 @@ class file_reader {
     [[nodiscard]] const char *buffer() const noexcept;
 
   private:
+#if defined(PLSSVM_HAS_MEMORY_MAPPING_UNIX)
     /**
      * @brief Try to open the file @p filename and "read" its content using memory mapped IO on UNIX systems.
      * @details If the file could not be memory mapped, automatically falls back to open_file().
@@ -167,7 +168,9 @@ class file_reader {
      * @throws plssvm::file_not_found_exception if the @p filename couldn't be found
      */
     void open_memory_mapped_file_unix(const char *filename);
+#endif
 
+#if defined(PLSSVM_HAS_MEMORY_MAPPING_WINDOWS)
     /**
      * @brief Try to open the file @p filename and "read" its content using memory mapped IO on Windows systems.
      * @details If the file could not be memory mapped, automatically falls back to open_file().
@@ -175,6 +178,7 @@ class file_reader {
      * @throws plssvm::file_not_found_exception if the @p filename couldn't be found
      */
     void open_memory_mapped_file_windows(const char *filename);
+#endif
 
     /**
      * @brief Read open the file and read its content in one buffer using a normal std::ifstream.
diff --git a/include/plssvm/detail/io/regression_libsvm_model_parsing.hpp b/include/plssvm/detail/io/regression_libsvm_model_parsing.hpp
index 514aeef7f..5cfbb9ae6 100644
--- a/include/plssvm/detail/io/regression_libsvm_model_parsing.hpp
+++ b/include/plssvm/detail/io/regression_libsvm_model_parsing.hpp
@@ -13,21 +13,22 @@
 #define PLSSVM_DETAIL_IO_REGRESSION_LIBSVM_MODEL_PARSING_HPP_
 #pragma once
 
-#include "plssvm/constants.hpp"                     // plssvm::real_type, plssvm::PADDING_SIZE
-#include "plssvm/data_set/regression_data_set.hpp"  // plssvm::regression_data_set
-#include "plssvm/detail/assert.hpp"                 // PLSSVM_ASSERT
-#include "plssvm/detail/io/file_reader.hpp"         // plssvm::detail::io::file_reader
-#include "plssvm/detail/io/libsvm_parsing.hpp"      // plssvm::detail::io::parse_libsvm_num_features
-#include "plssvm/detail/logging.hpp"                // plssvm::detail::log
-#include "plssvm/detail/memory_size.hpp"            // plssvm::memory_size, custom literals
-#include "plssvm/detail/string_conversion.hpp"      // plssvm::detail::{convert_to, split_as}
-#include "plssvm/detail/string_utility.hpp"         // plssvm::detail::{trim, trim_left, to_lower_case}
-#include "plssvm/gamma.hpp"                         // plssvm::get_gamma_string
-#include "plssvm/kernel_function_types.hpp"         // plssvm::kernel_function_type
-#include "plssvm/matrix.hpp"                        // plssvm::soa_matrix
-#include "plssvm/parameter.hpp"                     // plssvm::parameter
-#include "plssvm/shape.hpp"                         // plssvm::shape
-#include "plssvm/verbosity_levels.hpp"              // plssvm::verbosity_level
+#include "plssvm/constants.hpp"                         // plssvm::real_type, plssvm::PADDING_SIZE
+#include "plssvm/data_set/regression_data_set.hpp"      // plssvm::regression_data_set
+#include "plssvm/detail/assert.hpp"                     // PLSSVM_ASSERT
+#include "plssvm/detail/io/file_reader.hpp"             // plssvm::detail::io::file_reader
+#include "plssvm/detail/io/libsvm_parsing.hpp"          // plssvm::detail::io::parse_libsvm_num_features
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"  // plssvm::detail::log_untracked
+#include "plssvm/detail/memory_size.hpp"                // plssvm::memory_size, custom literals
+#include "plssvm/detail/string_conversion.hpp"          // plssvm::detail::{convert_to, split_as}
+#include "plssvm/detail/string_utility.hpp"             // plssvm::detail::{trim, trim_left, to_lower_case}
+#include "plssvm/gamma.hpp"                             // plssvm::get_gamma_string
+#include "plssvm/kernel_function_types.hpp"             // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                            // plssvm::soa_matrix
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
+#include "plssvm/parameter.hpp"                         // plssvm::parameter
+#include "plssvm/shape.hpp"                             // plssvm::shape
+#include "plssvm/verbosity_levels.hpp"                  // plssvm::verbosity_level
 
 #include "fmt/compile.h"  // FMT_COMPILE
 #include "fmt/format.h"   // fmt::format_to, fmt::format
@@ -370,13 +371,14 @@ namespace plssvm::detail::io {
  * @endcode
  * @tparam label_type the type of the labels (any arithmetic type, except bool, or std::string)
  * @param[in,out] out the output-stream to write the header information to
+ * @param[in] comm the used MPI communicator
  * @param[in] params the SVM parameters
  * @param[in] rho the rho value
  * @param[in] data the data used to create the model
  * @attention The PLSSVM model file is currently not compatible with LIBSVM due to other "svm_type" entries.
  */
 template <typename label_type>
-inline void write_libsvm_model_header_regression(fmt::ostream &out, const plssvm::parameter &params, const std::vector<real_type> &rho, const regression_data_set<label_type> &data) {
+inline void write_libsvm_model_header_regression(fmt::ostream &out, const mpi::communicator &comm, const plssvm::parameter &params, const std::vector<real_type> &rho, const regression_data_set<label_type> &data) {
     PLSSVM_ASSERT(rho.size() == 1, "Exactly one rho value must be provided!");
 
     // save model file header
@@ -403,9 +405,10 @@ inline void write_libsvm_model_header_regression(fmt::ostream &out, const plssvm
                               fmt::join(rho, " "));
 
     // print model header
-    detail::log(verbosity_level::full | verbosity_level::libsvm,
-                "\n{}\n",
-                out_string);
+    detail::log_untracked(verbosity_level::full | verbosity_level::libsvm,
+                          comm,
+                          "\n{}\n",
+                          out_string);
     // write model header to file
     out.print("{}", out_string);
 }
@@ -431,6 +434,7 @@ inline void write_libsvm_model_header_regression(fmt::ostream &out, const plssvm
  * @endcode
  * @tparam label_type the type of the labels (any arithmetic type, except bool, or std::string)
  * @param[in] filename the file to write the LIBSVM model to
+ * @param[in] comm the used MPI communicator
  * @param[in] params the SVM parameters
  * @param[in] rho the rho value resulting from the hyperplane learning
  * @param[in] alpha the weights learned by the SVM
@@ -438,7 +442,7 @@ inline void write_libsvm_model_header_regression(fmt::ostream &out, const plssvm
  * @attention The PLSSVM model file is only compatible with LIBSVM for the one vs. one classification type.
  */
 template <typename label_type>
-inline void write_libsvm_model_data_regression(const std::string &filename, const plssvm::parameter &params, const std::vector<real_type> &rho, const std::vector<aos_matrix<real_type>> &alpha, const regression_data_set<label_type> &data) {
+inline void write_libsvm_model_data_regression(const std::string &filename, const mpi::communicator &comm, const plssvm::parameter &params, const std::vector<real_type> &rho, const std::vector<aos_matrix<real_type>> &alpha, const regression_data_set<label_type> &data) {
     PLSSVM_ASSERT(!filename.empty(), "The provided model filename must not be empty!");
     PLSSVM_ASSERT(rho.size() == 1,
                   "The number of rho values is {} but must be exactly 1!",
@@ -458,7 +462,7 @@ inline void write_libsvm_model_data_regression(const std::string &filename, cons
     fmt::ostream out = fmt::output_file(filename);
 
     // write header information
-    write_libsvm_model_header_regression(out, params, rho, data);
+    write_libsvm_model_header_regression(out, comm, params, rho, data);
 
     // the maximum size of one formatted LIBSVM entry, e.g., 1234:1.365363e+10
     // biggest number representable as std::size_t: 18446744073709551615 -> 20 chars
diff --git a/include/plssvm/detail/logging.hpp b/include/plssvm/detail/logging/log.hpp
similarity index 58%
rename from include/plssvm/detail/logging.hpp
rename to include/plssvm/detail/logging/log.hpp
index 8cccb39b9..f612dee01 100644
--- a/include/plssvm/detail/logging.hpp
+++ b/include/plssvm/detail/logging/log.hpp
@@ -6,22 +6,18 @@
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
  *
- * @brief Defines a simple logging function. Additionally, depends on the `plssvm::detail::performance_tracker`.
+ * @brief Defines a simple logging function without MPI support. Additionally, depends on the `plssvm::detail::performance_tracker`.
  */
 
-#ifndef PLSSVM_DETAIL_LOGGING_HPP_
-#define PLSSVM_DETAIL_LOGGING_HPP_
+#ifndef PLSSVM_DETAIL_LOGGING_LOG_HPP_
+#define PLSSVM_DETAIL_LOGGING_LOG_HPP_
 #pragma once
 
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking::is_tracking_entry_v,
                                                            // PLSSVM_PERFORMANCE_TRACKER_ENABLED, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY
-#include "plssvm/verbosity_levels.hpp"                     // plssvm::verbosity_level, plssvm::verbosity, bitwise-operators on plssvm::verbosity_level
+#include "plssvm/detail/logging/log_untracked.hpp"         // plssvm::detail::log_untracked
+#include "plssvm/verbosity_levels.hpp"                     // plssvm::verbosity_level
 
-#include "fmt/chrono.h"  // format std::chrono types
-#include "fmt/color.h"   // fmt::fg, fmt::color
-#include "fmt/format.h"  // fmt::format, fmt::runtime
-
-#include <iostream>     // std::cout, std::clog, std::flush
 #include <string_view>  // std::string_view
 #include <utility>      // std::forward
 
@@ -33,23 +29,14 @@ namespace plssvm::detail {
  *          this is also added to the `plssvm::detail::performance_tracker`.
  *          Only logs the message if the verbosity level matches the `plssvm::verbosity` level.
  * @tparam Args the types of the placeholder values
- * @param[in] verb the verbosity level of the message to log; must match the `plssvm::verbosity` level to log the message
+ * @param[in] msg_verbosity the verbosity level of the message to log
  * @param[in] msg the message to print on the standard output stream if requested (i.e., `plssvm::verbosity` isn't `plssvm::verbosity_level::quiet`)
  * @param[in] args the values to fill the {fmt}-like placeholders in @p msg
  */
 template <typename... Args>
-void log(const verbosity_level verb, const std::string_view msg, Args &&...args) {
-    // if the verbosity level is quiet, nothing is logged
-    // otherwise verb must contain the bit-flag currently set by plssvm::verbosity
-    if (verbosity != verbosity_level::quiet && (verb & verbosity) != verbosity_level::quiet) {
-        // if the plssvm::verbosity_level is the warning level, output the message on stderr
-        // otherwise output the message on stdout
-        if ((verb & verbosity_level::warning) != verbosity_level::quiet) {
-            std::clog << fmt::format(fmt::runtime(msg), args...) << std::flush;
-        } else {
-            std::cout << fmt::format(fmt::runtime(msg), args...) << std::flush;
-        }
-    }
+void log(const verbosity_level msg_verbosity, const std::string_view msg, Args &&...args) {
+    // first, log the message to the standard output without performance tracking
+    log_untracked(msg_verbosity, msg, args...);
 
     // if performance tracking has been enabled, add tracking entries
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
@@ -64,4 +51,4 @@ void log(const verbosity_level verb, const std::string_view msg, Args &&...args)
 
 }  // namespace plssvm::detail
 
-#endif  // PLSSVM_DETAIL_LOGGING_HPP_
+#endif  // PLSSVM_DETAIL_LOGGING_LOG_HPP_
diff --git a/include/plssvm/detail/logging/log_untracked.hpp b/include/plssvm/detail/logging/log_untracked.hpp
new file mode 100644
index 000000000..4a8ef94af
--- /dev/null
+++ b/include/plssvm/detail/logging/log_untracked.hpp
@@ -0,0 +1,57 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a simple logging function without MPI support. Wrapper that disables performance tracking due to circular dependencies.
+ */
+
+#ifndef PLSSVM_DETAIL_LOGGING_LOG_UNTRACKED_HPP_
+#define PLSSVM_DETAIL_LOGGING_LOG_UNTRACKED_HPP_
+#pragma once
+
+#include "plssvm/verbosity_levels.hpp"  // plssvm::verbosity_level, plssvm::verbosity, bitwise-operators on plssvm::verbosity_level
+
+#include "fmt/chrono.h"  // format std::chrono types
+#include "fmt/color.h"   // fmt::fg, fmt::color
+#include "fmt/format.h"  // fmt::format, fmt::runtime
+
+#include <iostream>     // std::cout, std::clog, std::flush
+#include <string_view>  // std::string_view
+#include <utility>      // std::forward
+
+namespace plssvm::detail {
+
+/**
+ * @brief Output the message @p msg filling the {fmt} like placeholders with @p args to the standard output stream.
+ * @details Only logs the message if the verbosity level matches the `plssvm::verbosity` level.
+ * @tparam Args the types of the placeholder values
+ * @param[in] msg_verbosity the verbosity level of the message to log
+ * @param[in] msg the message to print on the standard output stream if requested (i.e., `plssvm::verbosity` isn't `plssvm::verbosity_level::quiet`)
+ * @param[in] args the values to fill the {fmt}-like placeholders in @p msg
+ */
+template <typename... Args>
+void log_untracked(const verbosity_level msg_verbosity, const std::string_view msg, Args &&...args) {
+    // verbosity = the currently active verbosity level
+    // msg_verbosity = the verbosity of the current message
+
+    // if the global verbosity or the message verbosity is 'plssvm::verbosity_level::quiet', nothing should be logged
+    if (!(verbosity == verbosity_level::quiet || msg_verbosity == verbosity_level::quiet)) {
+        // check whether the provided msg_verbosity is contained in the current active verbosity
+        if ((verbosity & msg_verbosity) != verbosity_level::quiet || (verbosity == verbosity_level::full && (msg_verbosity & verbosity_level::libsvm) == verbosity_level::quiet)) {
+            // check if it is a warning message, if yes, the output will be colored
+            if ((msg_verbosity & verbosity_level::warning) != verbosity_level::quiet) {
+                std::clog << fmt::format(fmt::fg(fmt::color::orange), fmt::runtime(msg), std::forward<Args>(args)...) << std::flush;
+            } else {
+                std::cout << fmt::format(fmt::runtime(msg), std::forward<Args>(args)...) << std::flush;
+            }
+        }
+    }
+}
+
+}  // namespace plssvm::detail
+
+#endif  // PLSSVM_DETAIL_LOGGING_LOG_UNTRACKED_HPP_
diff --git a/include/plssvm/detail/logging/mpi_log.hpp b/include/plssvm/detail/logging/mpi_log.hpp
new file mode 100644
index 000000000..b03ef3ef9
--- /dev/null
+++ b/include/plssvm/detail/logging/mpi_log.hpp
@@ -0,0 +1,49 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a simple logging function with MPI support. Additionally, depends on the `plssvm::detail::performance_tracker`.
+ */
+
+#ifndef PLSSVM_DETAIL_LOGGING_MPI_LOG_HPP_
+#define PLSSVM_DETAIL_LOGGING_MPI_LOG_HPP_
+#pragma once
+
+#include "plssvm/detail/logging/log.hpp"  // plssvm::detail::log
+#include "plssvm/mpi/communicator.hpp"    // plssvm::mpi::communicator
+#include "plssvm/verbosity_levels.hpp"    // plssvm::verbosity_level, plssvm::verbosity, bitwise-operators on plssvm::verbosity_level
+
+#include <string_view>  // std::string_view
+#include <utility>      // std::forward
+
+namespace plssvm::detail {
+
+/**
+ * @brief Output the message @p msg filling the {fmt} like placeholders with @p args to the standard output stream if @p comm represents the current main MPI rank.
+ * @details If a value in @p args is of type plssvm::detail::tracking_entry and performance tracking is enabled,
+ *          this is also added to the `plssvm::detail::performance_tracker`.
+ *          Only logs the message if the verbosity level matches the `plssvm::verbosity` level.
+ * @tparam Args the types of the placeholder values
+ * @param[in] msg_verbosity the verbosity level of the message to log
+ * @param[in] comm the used MPI communicator
+ * @param[in] msg the message to print on the standard output stream if requested (i.e., `plssvm::verbosity` isn't `plssvm::verbosity_level::quiet`)
+ * @param[in] args the values to fill the {fmt}-like placeholders in @p msg
+ */
+template <typename... Args>
+void log(const verbosity_level msg_verbosity, const mpi::communicator &comm, const std::string_view msg, Args &&...args) {
+    if (comm.is_main_rank()) {
+        // only print on the main MPI rank
+        log(msg_verbosity, msg, std::forward<Args>(args)...);
+    } else {
+        // set output to quiet otherwise (since all MPI ranks should track their args)
+        log(verbosity_level::quiet, msg, std::forward<Args>(args)...);
+    }
+}
+
+}  // namespace plssvm::detail
+
+#endif  // PLSSVM_DETAIL_LOGGING_MPI_LOG_HPP_
diff --git a/include/plssvm/detail/logging/mpi_log_untracked.hpp b/include/plssvm/detail/logging/mpi_log_untracked.hpp
new file mode 100644
index 000000000..411edecff
--- /dev/null
+++ b/include/plssvm/detail/logging/mpi_log_untracked.hpp
@@ -0,0 +1,45 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a simple logging function with MPI support. Wrapper that disables performance tracking due to circular dependencies.
+ */
+
+#ifndef PLSSVM_DETAIL_LOGGING_MPI_LOG_UNTRACKED_HPP_
+#define PLSSVM_DETAIL_LOGGING_MPI_LOG_UNTRACKED_HPP_
+#pragma once
+
+#include "plssvm/detail/logging/log_untracked.hpp"  // plssvm::detail::log_untracked
+#include "plssvm/mpi/communicator.hpp"              // plssvm::mpi::communicator
+#include "plssvm/verbosity_levels.hpp"              // plssvm::verbosity_level, plssvm::verbosity, bitwise-operators on plssvm::verbosity_level
+
+#include <string_view>  // std::string_view
+#include <utility>      // std::forward
+
+namespace plssvm::detail {
+
+/**
+ * @brief Output the message @p msg filling the {fmt} like placeholders with @p args to the standard output stream if @p comm represents the current main MPI rank.
+ * @details Only logs the message if the verbosity level matches the `plssvm::verbosity` level.
+ * @tparam Args the types of the placeholder values
+ * @param[in] msg_verbosity the verbosity level of the message to log
+ * @param[in] comm the used MPI communicator
+ * @param[in] msg the message to print on the standard output stream if requested (i.e., `plssvm::verbosity` isn't `plssvm::verbosity_level::quiet`)
+ * @param[in] args the values to fill the {fmt}-like placeholders in @p msg
+ */
+template <typename... Args>
+void log_untracked(const verbosity_level msg_verbosity, const mpi::communicator &comm, const std::string_view msg, Args &&...args) {
+    if (comm.is_main_rank()) {
+        // only print on the main MPI rank
+        log_untracked(msg_verbosity, msg, std::forward<Args>(args)...);
+    }
+    // nothing to do on other MPI ranks since nothing must be tracked
+}
+
+}  // namespace plssvm::detail
+
+#endif  // PLSSVM_DETAIL_LOGGING_LOG_UNTRACKED_HPP_
diff --git a/include/plssvm/detail/logging_without_performance_tracking.hpp b/include/plssvm/detail/logging_without_performance_tracking.hpp
deleted file mode 100644
index a92729a66..000000000
--- a/include/plssvm/detail/logging_without_performance_tracking.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * @file
- * @author Alexander Van Craen
- * @author Marcel Breyer
- * @copyright 2018-today The PLSSVM project - All Rights Reserved
- * @license This file is part of the PLSSVM project which is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- *
- * @brief Defines a simple logging function. Wrapper that disables performance tracking due to circular dependencies.
- */
-
-#ifndef PLSSVM_DETAIL_LOGGING_WITHOUT_PERFORMANCE_TRACKING_HPP_
-#define PLSSVM_DETAIL_LOGGING_WITHOUT_PERFORMANCE_TRACKING_HPP_
-#pragma once
-
-#include "plssvm/verbosity_levels.hpp"  // plssvm::verbosity_level, plssvm::verbosity, bitwise-operators on plssvm::verbosity_level
-
-#include "fmt/chrono.h"  // format std::chrono types
-#include "fmt/color.h"   // fmt::fg, fmt::color
-#include "fmt/format.h"  // fmt::format, fmt::runtime
-
-#include <iostream>     // std::cout, std::clog, std::flush
-#include <string_view>  // std::string_view
-#include <utility>      // std::forward
-
-namespace plssvm::detail {
-
-/**
- * @brief Output the message @p msg filling the {fmt} like placeholders with @p args to the standard output stream.
- * @details Only logs the message if the verbosity level matches the `plssvm::verbosity` level.
- * @tparam Args the types of the placeholder values
- * @param[in] verb the verbosity level of the message to log; must match the `plssvm::verbosity` level to log the message
- * @param[in] msg the message to print on the standard output stream if requested (i.e., `plssvm::verbosity` isn't `plssvm::verbosity_level::quiet`)
- * @param[in] args the values to fill the {fmt}-like placeholders in @p msg
- */
-template <typename... Args>
-void log_untracked(const verbosity_level verb, const std::string_view msg, Args &&...args) {
-    // if the verbosity level is quiet, nothing is logged
-    // otherwise verb must contain the bit-flag set by plssvm::verbosity
-    if (verbosity != verbosity_level::quiet && (verb & verbosity) != verbosity_level::quiet) {
-        if ((verb & verbosity_level::warning) != verbosity_level::quiet) {
-            std::clog << fmt::format(fmt::runtime(msg), std::forward<Args>(args)...) << std::flush;
-        } else {
-            std::cout << fmt::format(fmt::runtime(msg), std::forward<Args>(args)...) << std::flush;
-        }
-    }
-}
-
-}  // namespace plssvm::detail
-
-#endif  // PLSSVM_DETAIL_LOGGING_WITHOUT_PERFORMANCE_TRACKING_HPP_
diff --git a/include/plssvm/detail/string_conversion.hpp b/include/plssvm/detail/string_conversion.hpp
index b71170581..38fa078c5 100644
--- a/include/plssvm/detail/string_conversion.hpp
+++ b/include/plssvm/detail/string_conversion.hpp
@@ -14,6 +14,7 @@
 #pragma once
 
 #include "plssvm/detail/arithmetic_type_name.hpp"  // plssvm::detail::arithmetic_type_name
+#include "plssvm/detail/fast_float_wrapper.hpp"    // plssvm::detail::convert_to_floating_point
 #include "plssvm/detail/string_utility.hpp"        // plssvm::detail::{trim, trim_left, as_lower_case}
 #include "plssvm/detail/type_traits.hpp"           // PLSSVM_REQUIRES, plssvm::detail::remove_cvref_t
 #include "plssvm/detail/utility.hpp"               // plssvm::detail::unreachable
@@ -31,17 +32,6 @@
 
 namespace plssvm::detail {
 
-/**
- * @brief Converts the string @p str to a floating point value of type @p T.
- * @details If @p T is a `long double` [`std::stold`](https://en.cppreference.com/w/cpp/string/basic_string/stof) is used since fast_float doesn't support long double,
- *          otherwise [`float_fast::from_chars`](https://github.com/fastfloat/fast_float) is used.
- * @tparam T the type to convert the value of @p str to, must be a floating point type
- * @param[in] str the string to convert
- * @return the value of type @p T denoted by @p str and the potential error code if the @p str couldn't be converted to the type @p T (`[[nodiscard]]`)
- */
-template <typename T>
-[[nodiscard]] std::pair<T, std::errc> convert_to_floating_point(std::string_view str);
-
 /**
  * @brief Converts the string @p str to a value of type @p T.
  * @details If @p T is a `std::string` a trimmed version of the string is returned,
diff --git a/include/plssvm/detail/tracking/performance_tracker.hpp b/include/plssvm/detail/tracking/performance_tracker.hpp
index 06f2a292d..ce2394dbd 100644
--- a/include/plssvm/detail/tracking/performance_tracker.hpp
+++ b/include/plssvm/detail/tracking/performance_tracker.hpp
@@ -21,6 +21,7 @@
 #include "plssvm/detail/tracking/events.hpp"     // plssvm::detail::tracking::{events, event}
 #include "plssvm/detail/type_traits.hpp"         // plssvm::detail::remove_cvref_t
 #include "plssvm/detail/utility.hpp"             // PLSSVM_EXTERN
+#include "plssvm/mpi/communicator.hpp"           // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                  // plssvm::parameter
 
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
@@ -188,6 +189,13 @@ class performance_tracker {
      * @param[in] entry the entry to add
      */
     void add_tracking_entry(const tracking_entry<plssvm::parameter> &entry);
+    /**
+     * @brief Add a tracking_entry encapsulating a `plssvm::mpi::communicator` to this performance tracker.
+     * @details Saves a string containing the entry name and value in a map with the entry category as key.
+     *          Adds all values related to MPI (if available).
+     * @param[in] entry the entry to add
+     */
+    void add_tracking_entry(const tracking_entry<mpi::communicator> &entry);
     /**
      * @brief Add a tracking_entry encapsulating a `plssvm::detail::cmd::parser_train` to this performance tracker.
      * @details Saves a string containing the entry name and value in a map with the entry category as key.
diff --git a/include/plssvm/detail/type_list.hpp b/include/plssvm/detail/type_list.hpp
index 4b9b4d31b..38db27f49 100644
--- a/include/plssvm/detail/type_list.hpp
+++ b/include/plssvm/detail/type_list.hpp
@@ -29,7 +29,7 @@ using supported_label_types_classification = std::tuple<bool, char, signed char,
 using supported_label_types_classification_reduced = std::tuple<bool, int, double, std::string>;
 
 /// A type list of all supported label types (currently arithmetic types and `std::string`) as `std::tuple`.
-using supported_label_types_regression = std::tuple<short, unsigned short, int, unsigned int, long, unsigned long, long long, unsigned long long, float, double, long double>;
+using supported_label_types_regression = std::tuple<short, int, long, long long, float, double, long double>;
 
 /// A type list of a reduced number of supported label types as `std::tuple`.
 using supported_label_types_regression_reduced = std::tuple<int, double>;
diff --git a/include/plssvm/environment.hpp b/include/plssvm/environment.hpp
index cddb3f31c..3dec0a5c6 100644
--- a/include/plssvm/environment.hpp
+++ b/include/plssvm/environment.hpp
@@ -2,6 +2,7 @@
  * @file
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -13,17 +14,19 @@
 
 #ifndef PLSSVM_ENVIRONMENT_HPP_
 #define PLSSVM_ENVIRONMENT_HPP_
+#pragma once
 
 #include "plssvm/backend_types.hpp"          // plssvm::backend_type, plssvm::list_available_backends
 #include "plssvm/detail/assert.hpp"          // PLSSVM_ASSERT
 #include "plssvm/detail/string_utility.hpp"  // plssvm::detail::to_lower_case
 #include "plssvm/detail/utility.hpp"         // plssvm::detail::{contains, unreachable}
 #include "plssvm/exceptions/exceptions.hpp"  // plssvm::environment_exception
+#include "plssvm/mpi/environment.hpp"        // plssvm::mpi::{is_initialized, init}
 
 #if defined(PLSSVM_HAS_HPX_BACKEND)
-    #include <hpx/execution.hpp>  // ::hpx::post
-    #include <hpx/hpx_start.hpp>  // ::hpx::{start, stop, finalize}
-    #include <hpx/runtime.hpp>    // ::hpx::{is_running, is_stopped}
+    #include "hpx/execution.hpp"  // ::hpx::post
+    #include "hpx/hpx_start.hpp"  // ::hpx::{start, stop, finalize}
+    #include "hpx/runtime.hpp"    // ::hpx::{is_running, is_stopped}
 #endif
 #if defined(PLSSVM_HAS_KOKKOS_BACKEND)
     #include "Kokkos_Core.hpp"  // Kokkos::is_initialized, Kokkos::is_finalized, Kokkos::initialize, Kokkos::finalize
@@ -87,7 +90,7 @@ inline std::ostream &operator<<(std::ostream &out, const status s) {
 inline std::istream &operator>>(std::istream &in, status &s) {
     std::string str;
     in >> str;
-    detail::to_lower_case(str);
+    ::plssvm::detail::to_lower_case(str);
 
     if (str == "uninitialized") {
         s = status::uninitialized;
@@ -274,6 +277,11 @@ inline void finalize_backend([[maybe_unused]] const backend_type backend) {
  */
 template <typename... Args>
 inline void initialize_impl(const std::vector<backend_type> &backends, Args &...args) {
+    // if necessary, initialize MPI
+    if (!mpi::is_initialized()) {
+        mpi::init(args...);
+    }
+
     // check if the provided backends are currently available
     const std::vector<backend_type> available_backends = list_available_backends();
     for (const backend_type backend : backends) {
@@ -400,6 +408,11 @@ inline std::vector<backend_type> initialize(int &argc, char **argv) {
  * @throws plssvm::environment_exception if one of the provided @p backends has already been finalized
  */
 inline void finalize(const std::vector<backend_type> &backends) {
+    // if necessary, finalize MPI
+    if (!mpi::is_finalized()) {
+        mpi::finalize();
+    }
+
     // check if the provided backends are currently available
     const std::vector<backend_type> available_backends = list_available_backends();
     for (const backend_type backend : backends) {
diff --git a/include/plssvm/exceptions/exceptions.hpp b/include/plssvm/exceptions/exceptions.hpp
index eac9254b4..7ee804b65 100644
--- a/include/plssvm/exceptions/exceptions.hpp
+++ b/include/plssvm/exceptions/exceptions.hpp
@@ -56,6 +56,30 @@ class exception : public std::runtime_error {
     source_location loc_;
 };
 
+/**
+ * @brief Exception type thrown for early exit in the cmd parser constructor.
+ * @details Used for a graceful tear down.
+ */
+class cmd_parser_exit : public exception {
+  public:
+    /**
+     * @brief Construct a new exception forwarding the exit code and source location to `plssvm::exception`.
+     * @param[in] exit_code the exit code
+     * @param[in] loc the exception's call side information
+     */
+    explicit cmd_parser_exit(int exit_code, source_location loc = source_location::current());
+
+    /**
+     * @brief Return the previously defined exit code.
+     * @return the exit code (`[[nodiscard]]`)
+     */
+    [[nodiscard]] int exit_code() const noexcept { return exit_code_; }
+
+  private:
+    /// The exit code.
+    int exit_code_{};
+};
+
 /**
  * @brief Exception type thrown if the provided parameter is invalid.
  */
@@ -251,6 +275,19 @@ class environment_exception : public exception {
     explicit environment_exception(const std::string &msg, source_location loc = source_location::current());
 };
 
+/**
+ * @brief Exception type thrown if something regarding our MPI wrapper went wrong.
+ */
+class mpi_exception : public exception {
+  public:
+    /**
+     * @brief Construct a new exception forwarding the exception message and source location to `plssvm::exception`.
+     * @param[in] msg the exception's `what()` message
+     * @param[in] loc the exception's call side information
+     */
+    explicit mpi_exception(const std::string &msg, source_location loc = source_location::current());
+};
+
 }  // namespace plssvm
 
 #endif  // PLSSVM_EXCEPTIONS_EXCEPTIONS_HPP_
diff --git a/include/plssvm/exceptions/source_location.hpp b/include/plssvm/exceptions/source_location.hpp
index f27833e51..a75cea355 100644
--- a/include/plssvm/exceptions/source_location.hpp
+++ b/include/plssvm/exceptions/source_location.hpp
@@ -14,6 +14,7 @@
 #pragma once
 
 #include <cstdint>      // std::uint_least32_t
+#include <optional>     // std::optional, std::nullopt, std::make_optional
 #include <string_view>  // std::string_view
 
 namespace plssvm {
@@ -32,46 +33,44 @@ class source_location {
      * @param[in] column the column number, always `0`
      * @return the source location object holding the information about the current call side (`[[nodiscard]]`)
      */
-    [[nodiscard]] static constexpr source_location current(
+    [[nodiscard]] static source_location current(
         const char *file_name = __builtin_FILE(),
         const char *function_name = __builtin_FUNCTION(),
         int line = __builtin_LINE(),
-        int column = 0) noexcept {
-        source_location loc;
-
-        loc.file_name_ = file_name;
-        loc.function_name_ = function_name;
-        loc.line_ = static_cast<std::uint_least32_t>(line);
-        loc.column_ = static_cast<uint_least32_t>(column);
-
-        return loc;
-    }
+        int column = 0) noexcept;
 
     /**
      * @brief Returns the absolute path name of the file or `"unknown"` if no information could be retrieved.
      * @return the file name (`[[nodiscard]]`)
      */
-    [[nodiscard]] constexpr std::string_view function_name() const noexcept { return function_name_; }
+    [[nodiscard]] std::string_view function_name() const noexcept { return function_name_; }
 
     /**
      * @brief Returns the function name without additional signature information (i.e. return type and parameters)
      *        or `"unknown"` if no information could be retrieved.
      * @return the function name (`[[nodiscard]]`)
      */
-    [[nodiscard]] constexpr std::string_view file_name() const noexcept { return file_name_; }
+    [[nodiscard]] std::string_view file_name() const noexcept { return file_name_; }
 
     /**
      * @brief Returns the line number or `0` if no information could be retrieved.
      * @return the line number (`[[nodiscard]]`)
      */
-    [[nodiscard]] constexpr std::uint_least32_t line() const noexcept { return line_; }
+    [[nodiscard]] std::uint_least32_t line() const noexcept { return line_; }
 
     /**
      * @brief Returns the column number.
      * @attention Always `0`!
      * @return `0` (`[[nodiscard]]`)
      */
-    [[nodiscard]] constexpr std::uint_least32_t column() const noexcept { return column_; }
+    [[nodiscard]] std::uint_least32_t column() const noexcept { return column_; }
+
+    /**
+     * @brief Returns the current MPI rank.
+     * @attention Only available in an active MPI environment.
+     * @return the current MPI rank, or `std::nullopt` if not available (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::optional<int> world_rank() const noexcept { return world_rank_; }
 
   private:
     /// The line number as retrieved by `__builtin_LINE()`.
@@ -82,6 +81,8 @@ class source_location {
     const char *file_name_{ "unknown" };
     /// The function name as retrieved by `__builtin_FUNCTION()`.
     const char *function_name_{ "unknown" };
+    /// The current MPI rank **with respect to** MPI_COMM_WORLD, if an MPI environment is active!
+    std::optional<int> world_rank_{ std::nullopt };
 };
 
 }  // namespace plssvm
diff --git a/include/plssvm/matrix.hpp b/include/plssvm/matrix.hpp
index 82f9884b7..95513f0f5 100644
--- a/include/plssvm/matrix.hpp
+++ b/include/plssvm/matrix.hpp
@@ -13,12 +13,12 @@
 #define PLSSVM_DETAIL_MATRIX_HPP_
 #pragma once
 
-#include "plssvm/detail/assert.hpp"                                // PLSSVM_ASSERT
-#include "plssvm/detail/logging_without_performance_tracking.hpp"  // plssvm::detail::log_untracked
-#include "plssvm/detail/utility.hpp"                               // plssvm::detail::{always_false_v, unreachable}
-#include "plssvm/exceptions/exceptions.hpp"                        // plssvm::matrix_exception
-#include "plssvm/shape.hpp"                                        // plssvm::shape
-#include "plssvm/verbosity_levels.hpp"                             // plssvm::verbosity_level
+#include "plssvm/detail/assert.hpp"                 // PLSSVM_ASSERT
+#include "plssvm/detail/logging/log_untracked.hpp"  // plssvm::detail::log_untracked
+#include "plssvm/detail/utility.hpp"                // plssvm::detail::{always_false_v, unreachable}
+#include "plssvm/exceptions/exceptions.hpp"         // plssvm::matrix_exception
+#include "plssvm/shape.hpp"                         // plssvm::shape
+#include "plssvm/verbosity_levels.hpp"              // plssvm::verbosity_level
 
 #include "fmt/base.h"     // fmt::formatter
 #include "fmt/color.h"    // fmt::fg, fmt::color::orange
diff --git a/include/plssvm/model/classification_model.hpp b/include/plssvm/model/classification_model.hpp
index 1b75422d1..c63081a39 100644
--- a/include/plssvm/model/classification_model.hpp
+++ b/include/plssvm/model/classification_model.hpp
@@ -20,11 +20,12 @@
 #include "plssvm/detail/assert.hpp"                                  // PLSSVM_ASSERT
 #include "plssvm/detail/io/classification_libsvm_model_parsing.hpp"  // plssvm::detail::io::{parse_libsvm_model_header_classification, parse_libsvm_model_data_classification, write_libsvm_model_data_classification}
 #include "plssvm/detail/io/file_reader.hpp"                          // plssvm::detail::io::file_reader
-#include "plssvm/detail/logging.hpp"                                 // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"                         // plssvm::detail::log
 #include "plssvm/detail/tracking/performance_tracker.hpp"            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY, plssvm::detail::tracking::tracking_entry
 #include "plssvm/detail/type_list.hpp"                               // plssvm::detail::{supported_label_types, tuple_contains_v}
 #include "plssvm/matrix.hpp"                                         // plssvm::soa_matrix, plssvm::aos_matrix
 #include "plssvm/model/model.hpp"                                    // plssvm::model
+#include "plssvm/mpi/communicator.hpp"                               // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                                      // plssvm::parameter
 #include "plssvm/verbosity_levels.hpp"                               // plssvm::verbosity_level
 
@@ -88,6 +89,14 @@ class classification_model : public model<U> {
      */
     explicit classification_model(const std::string &filename);
 
+    /**
+     * @brief Read a previously learned model from the LIBSVM model file @p filename.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the model file to read
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::detail::io::parse_libsvm_model_header and plssvm::detail::io::parse_libsvm_data
+     */
+    classification_model(mpi::communicator comm, const std::string &filename);
+
     /**
      * @brief Save the model to a LIBSVM model file for later usage.
      * @param[in] filename the file to save the model to
@@ -143,7 +152,12 @@ classification_model<U>::classification_model(parameter params, classification_d
     classification_strategy_{ classification_strategy } { }
 
 template <typename U>
-classification_model<U>::classification_model(const std::string &filename) {
+classification_model<U>::classification_model(const std::string &filename) :
+    classification_model{ mpi::communicator{}, filename } { }
+
+template <typename U>
+classification_model<U>::classification_model(mpi::communicator comm, const std::string &filename) :
+    base_model{ std::move(comm) } {
     const std::chrono::time_point start_time = std::chrono::steady_clock::now();
 
     // open the file
@@ -193,6 +207,7 @@ classification_model<U>::classification_model(const std::string &filename) {
 
     const std::chrono::time_point end_time = std::chrono::steady_clock::now();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                this->communicator(),
                 "Read {} support vectors with {} features and {} classes using {} classification in {} using the libsvm classification model parser from file '{}'.\n\n",
                 detail::tracking::tracking_entry{ "model_read", "num_support_vectors", this->num_support_vectors() },
                 detail::tracking::tracking_entry{ "model_read", "num_features", this->num_features() },
@@ -208,11 +223,14 @@ template <typename U>
 void classification_model<U>::save(const std::string &filename) const {
     const std::chrono::time_point start_time = std::chrono::steady_clock::now();
 
-    // save model file header and support vectors
-    detail::io::write_libsvm_model_data_classification(filename, this->get_params(), this->get_classification_type(), this->rho(), this->weights(), *index_sets_ptr_, dynamic_cast<classification_data_set<label_type> &>(*data_));
+    if (this->communicator().is_main_rank()) {
+        // save model file header and support vectors
+        detail::io::write_libsvm_model_data_classification(filename, this->communicator(), this->get_params(), this->get_classification_type(), this->rho(), this->weights(), *index_sets_ptr_, dynamic_cast<classification_data_set<label_type> &>(*data_));
+    }
 
     const std::chrono::time_point end_time = std::chrono::steady_clock::now();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                this->communicator(),
                 "Write {} support vectors with {} features and {} classes using {} classification in {} to the libsvm classification model file '{}'.\n",
                 detail::tracking::tracking_entry{ "model_write", "num_support_vectors", this->num_support_vectors() },
                 detail::tracking::tracking_entry{ "model_write", "num_features", this->num_features() },
diff --git a/include/plssvm/model/model.hpp b/include/plssvm/model/model.hpp
index 71c70e488..af35982dc 100644
--- a/include/plssvm/model/model.hpp
+++ b/include/plssvm/model/model.hpp
@@ -17,6 +17,7 @@
 #include "plssvm/data_set/data_set.hpp"  // plssvm::data_set, plssvm::optional_ref
 #include "plssvm/detail/assert.hpp"      // PLSSVM_ASSERT
 #include "plssvm/matrix.hpp"             // plssvm::soa_matrix, plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"   // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"          // plssvm::parameter
 
 #include <cstddef>   // std::size_t
@@ -40,6 +41,12 @@ class model {
     /// The unsigned size type.
     using size_type = std::size_t;
 
+    /**
+     * @brief Create a model with the provided MPI communicator.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     */
+    explicit model(mpi::communicator comm);
+
     /**
      * @brief Default copy constructor.
      */
@@ -67,6 +74,7 @@ class model {
     /**
      * @brief Save the model to a LIBSVM model file for later usage.
      * @param[in] filename the file to save the model to
+     * @note Only the main MPI rank (traditionally rank 0) saves the whole data set (if MPI is available).
      */
     virtual void save(const std::string &filename) const = 0;
 
@@ -128,6 +136,14 @@ class model {
      */
     [[nodiscard]] const std::optional<std::vector<unsigned long long>> &num_iters() const noexcept { return num_iters_; }
 
+    /**
+     * @brief Get the associated MPI communicator.
+     * @return the MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const mpi::communicator &communicator() const noexcept {
+        return comm_;
+    }
+
   protected:
     /**
      * @brief Default construct an empty model.
@@ -151,6 +167,9 @@ class model {
     /// The number of iterations needed to fit this model.
     std::optional<std::vector<unsigned long long>> num_iters_{};
 
+    /// The used MPI communicator.
+    mpi::communicator comm_{};
+
     /**
      * @brief The learned weights for each support vector.
      * @details For one vs. all the vector contains a single matrix representing all weights.
@@ -174,12 +193,17 @@ class model {
     std::shared_ptr<soa_matrix<real_type>> w_ptr_{ std::make_shared<soa_matrix<real_type>>() };
 };
 
+template <typename U>
+model<U>::model(mpi::communicator comm) :
+    comm_{ std::move(comm) } { }
+
 template <typename U>
 model<U>::model(parameter params, std::shared_ptr<data_set<label_type>> data) :
     params_{ std::move(params) },
     data_{ std::move(data) },
     num_support_vectors_{ data_->num_data_points() },
-    num_features_{ data_->num_features() } { }
+    num_features_{ data_->num_features() },
+    comm_{ data_->communicator() } { }
 
 }  // namespace plssvm
 
diff --git a/include/plssvm/model/regression_model.hpp b/include/plssvm/model/regression_model.hpp
index 017e53b39..77bb78aee 100644
--- a/include/plssvm/model/regression_model.hpp
+++ b/include/plssvm/model/regression_model.hpp
@@ -18,11 +18,12 @@
 #include "plssvm/data_set/regression_data_set.hpp"               // plssvm::regression_data_set
 #include "plssvm/detail/io/file_reader.hpp"                      // plssvm::detail::io::file_reader
 #include "plssvm/detail/io/regression_libsvm_model_parsing.hpp"  // plssvm::detail::io::{parse_libsvm_model_header_regression, parse_libsvm_model_data_regression, write_libsvm_model_data_regression}
-#include "plssvm/detail/logging.hpp"                             // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"                     // plssvm::detail::log
 #include "plssvm/detail/tracking/performance_tracker.hpp"        // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY, plssvm::detail::tracking::tracking_entry
 #include "plssvm/detail/type_list.hpp"                           // plssvm::detail::{supported_label_types, tuple_contains_v}
 #include "plssvm/matrix.hpp"                                     // plssvm::soa_matrix, plssvm::aos_matrix
 #include "plssvm/model/model.hpp"                                // plssvm::model
+#include "plssvm/mpi/communicator.hpp"                           // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                                  // plssvm::parameter
 #include "plssvm/verbosity_levels.hpp"                           // plssvm::verbosity_level
 
@@ -83,6 +84,14 @@ class regression_model : public model<U> {
      */
     explicit regression_model(const std::string &filename);
 
+    /**
+     * @brief Read a previously learned model from the LIBSVM model file @p filename.
+     * @param[in] comm the used MPI communicator (**note**: current only used to restrict logging outputs to the main MPI rank)
+     * @param[in] filename the model file to read
+     * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::detail::io::parse_libsvm_model_header and plssvm::detail::io::parse_libsvm_data
+     */
+    regression_model(mpi::communicator comm, const std::string &filename);
+
     /**
      * @brief Save the model to a LIBSVM model file for later usage.
      * @param[in] filename the file to save the model to
@@ -105,7 +114,12 @@ regression_model<U>::regression_model(parameter params, regression_data_set<labe
     base_model{ std::move(params), std::make_shared<regression_data_set<label_type>>(std::move(data)) } { }
 
 template <typename U>
-regression_model<U>::regression_model(const std::string &filename) {
+regression_model<U>::regression_model(const std::string &filename) :
+    regression_model{ mpi::communicator{}, filename } { }
+
+template <typename U>
+regression_model<U>::regression_model(mpi::communicator comm, const std::string &filename) :
+    base_model{ std::move(comm) } {
     const std::chrono::time_point start_time = std::chrono::steady_clock::now();
 
     // open the file
@@ -127,12 +141,12 @@ regression_model<U>::regression_model(const std::string &filename) {
     // create data set
     const verbosity_level old_verbosity = verbosity;
     verbosity = verbosity_level::quiet;
-    // TODO: check whether whether labels can and should be omitted for the regression task!
     data_ = std::make_shared<regression_data_set<label_type>>(std::move(support_vectors));
     verbosity = old_verbosity;
 
     const std::chrono::time_point end_time = std::chrono::steady_clock::now();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                this->communicator(),
                 "Read {} support vectors with {} features in {} using the libsvm regression model parser from file '{}'.\n\n",
                 detail::tracking::tracking_entry{ "model_read", "num_support_vectors", this->num_support_vectors() },
                 detail::tracking::tracking_entry{ "model_read", "num_features", this->num_features() },
@@ -145,11 +159,14 @@ template <typename U>
 void regression_model<U>::save(const std::string &filename) const {
     const std::chrono::time_point start_time = std::chrono::steady_clock::now();
 
-    // save model file header and support vectors
-    detail::io::write_libsvm_model_data_regression(filename, this->get_params(), this->rho(), this->weights(), dynamic_cast<regression_data_set<label_type> &>(*data_));
+    if (this->communicator().is_main_rank()) {
+        // save model file header and support vectors
+        detail::io::write_libsvm_model_data_regression(filename, this->communicator(), this->get_params(), this->rho(), this->weights(), dynamic_cast<regression_data_set<label_type> &>(*data_));
+    }
 
     const std::chrono::time_point end_time = std::chrono::steady_clock::now();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                this->communicator(),
                 "Write {} support vectors with {} features in {} to the libsvm regression model file '{}'.\n",
                 detail::tracking::tracking_entry{ "model_write", "num_support_vectors", this->num_support_vectors() },
                 detail::tracking::tracking_entry{ "model_write", "num_features", this->num_features() },
diff --git a/include/plssvm/mpi/communicator.hpp b/include/plssvm/mpi/communicator.hpp
new file mode 100644
index 000000000..886d13427
--- /dev/null
+++ b/include/plssvm/mpi/communicator.hpp
@@ -0,0 +1,244 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a wrapper class around MPI's communicators.
+ * @details This wrapper class is only conditionally compiled such that MPI is still an **optional** dependency in PLSSVM.
+ */
+
+#ifndef PLSSVM_MPI_COMMUNICATOR_HPP_
+#define PLSSVM_MPI_COMMUNICATOR_HPP_
+#pragma once
+
+#include "plssvm/detail/utility.hpp"           // PLSSVM_IS_DEFINED
+#include "plssvm/matrix.hpp"                   // plssvm::matrix, plssvm::layout_type
+#include "plssvm/mpi/detail/mpi_datatype.hpp"  // plssvm::mpi::detail::mpi_datatype
+#include "plssvm/mpi/detail/utility.hpp"       // PLSSVM_MPI_ERROR_CHECK
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_Comm, MPI_COMM_WORLD, MPI_Gather, MPI_Allreduce, MPI_Exscan, MPI_IN_PLACE, MPI_SUM
+#endif
+
+#include <chrono>      // std::chrono::milliseconds
+#include <cstddef>     // std::size_t
+#include <functional>  // std::invoke
+#include <optional>    // std::optional
+#include <string>      // std::string
+#include <vector>      // std::vector
+
+namespace plssvm::mpi {
+
+/**
+ * @brief A small wrapper around MPI functions used in PLSSVM.
+ * @details If PLSSVM was built without MPI support, this wrapper defines the respective functions to be essential no-ops.
+ */
+class communicator {
+  public:
+    /**
+     * @brief Default construct an MPI communicator wrapper using `MPI_COMM_WORLD`.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, does nothing.
+     */
+    communicator() = default;
+
+    /**
+     * @brief Default construct an MPI communicator wrapper using `MPI_COMM_WORLD` and set the load balancing @p weights.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, only stores the load balancing weights.
+     * @param[in] weights the load balancing weights
+     * @throws plssvm::mpi_exception if the number of @p weights does not match the MPI communicator size
+     */
+    explicit communicator(std::vector<std::size_t> weights);
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    /**
+     * @brief Construct an MPI communicator wrapper using the provided MPI communicator.
+     * @param[in] comm the provided MPI communicator
+     * @note This function does not take ownership of the provided MPI communicator!
+     */
+    explicit communicator(MPI_Comm comm);
+
+    /**
+     * @brief Construct an MPI communicator wrapper using the provided MPI communicator and set the load balancing @p weights.
+     * @param[in] comm the provided MPI communicator
+     * @param[in] weights the load balancing weights
+     * @throws plssvm::mpi_exception if the number of @p weights does not match the MPI communicator size
+     * @note This function does not take ownership of the provided MPI communicator!
+     */
+    communicator(MPI_Comm comm, std::vector<std::size_t> weights);
+#endif
+
+    /**
+     * @brief Return the total number of MPI ranks in this communicator.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns `1`.
+     * @return the number of MPI ranks in this communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::size_t size() const;
+    /**
+     * @brief Return the current MPI rank with respect to this communicator.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns `0`.
+     * @return the current MPI rank with respect to this communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::size_t rank() const;
+
+    /**
+     * @brief Return the MPI rank that is identified as main MPI rank.
+     * @details For PLSSVM, the main MPI rank is rank `0` in the current communicator.
+     * @return the main MPI rank `0` (`[[nodiscard]]`)
+     */
+    [[nodiscard]] constexpr static std::size_t main_rank() { return 0; }
+
+    /**
+     * @brief Check whether distributed execution via MPI is enabled.
+     * @return `true` if MPI is enabled, otherwise `false` (`[[nodiscard]]`)
+     */
+    [[nodiscard]] constexpr static bool is_mpi_enabled() { return PLSSVM_IS_DEFINED(PLSSVM_HAS_MPI_ENABLED); }
+
+    /**
+     * @brief Returns `true` if the current MPI rank is rank `0`, i.e., the main MPI rank.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns `true`.
+     * @return `true` if the current MPI rank is `0`, otherwise `false` (`[[nodiscard]]`)
+     */
+    [[nodiscard]] bool is_main_rank() const;
+    /**
+     * @brief Waits for all MPI ranks in this communicator to finish.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, does nothing.
+     */
+    void barrier() const;
+
+    /**
+     * @brief Execute the provided function @p f in a sequential manner across all MPI ranks in the current MPI communicator.
+     * @details The order is determined by the MPI ranks' values.
+     * @tparam Func the type of the function
+     * @param[in] f the function to execute
+     */
+    template <typename Func>
+    void serialize(Func f) const {
+        // iterate over all potential MPI ranks in the current communicator
+        for (std::size_t rank = 0; rank < this->size(); ++rank) {
+            // call function only if MY rank matches the current iteration
+            if (rank == this->rank()) {
+                std::invoke(f);
+            }
+            // wait for the current MPI rank to finish
+            this->barrier();
+        }
+    }
+
+    /**
+     * @brief Gather the @p value from each MPI rank on the `communicator::main_rank()`.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns the provided @p value wrapped in a `std::vector`.
+     * @tparam T the type of the values to gather
+     * @param[in] value the value to gather at the main MPI rank
+     * @return a `std::vector` containing all gathered values (`[[nodiscard]]`)
+     */
+    template <typename T>
+    [[nodiscard]] std::vector<T> gather(T value) const {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+        std::vector<T> result(this->size());
+        PLSSVM_MPI_ERROR_CHECK(MPI_Gather(&value, 1, detail::mpi_datatype<T>(), result.data(), 1, detail::mpi_datatype<T>(), communicator::main_rank(), comm_));
+        return result;
+#else
+        return { value };
+#endif
+    }
+
+    /**
+     * @brief Gather the `std::string` @p str from each MPI rank on the `communicator::main_rank()`.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns the provided @p str wrapped in a `std::vector`.
+     * @param[in] str the string to gather at the main MPI rank
+     * @return a `std::vector` containing all gathered strings (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::string> gather(const std::string &str) const;
+
+    /**
+     * @brief Gather the `std::chrono::milliseconds` @p duration from each MPI rank on the `communicator::main_rank()`.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns the provided @p duration wrapped in a `std::vector`.
+     * @param[in] duration the duration to gather at the main MPI rank
+     * @return a `std::vector` containing all gathered durations (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::chrono::milliseconds> gather(const std::chrono::milliseconds &duration) const;
+
+    /**
+     * @brief Gather the @p value from each MPI rank and distribute the result to all MPI ranks.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns the provided @p value wrapped in a `std::vector`.
+     * @tparam T the type of the values to gather
+     * @param[in] value the value to gather on all MPI ranks
+     * @return a `std::vector` containing all gathered values (`[[nodiscard]]`)
+     */
+    template <typename T>
+    [[nodiscard]] std::vector<T> allgather(T value) const {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+        std::vector<T> result(this->size());
+        PLSSVM_MPI_ERROR_CHECK(MPI_Allgather(&value, 1, detail::mpi_datatype<T>(), result.data(), 1, detail::mpi_datatype<T>(), comm_));
+        return result;
+#else
+        return { value };
+#endif
+    }
+
+    /**
+     * @brief Reduce the @p matr on all MPI ranks by summing all elements elementwise.
+     * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, does not mutate `matr`.
+     * @tparam T the value type of the matrix
+     * @tparam layout the matrix layout
+     * @param[in,out] matr the matrix to reduce, changed inplace
+     */
+    template <typename T, layout_type layout>
+    void allreduce_inplace([[maybe_unused]] plssvm::matrix<T, layout> &matr) const {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+        PLSSVM_MPI_ERROR_CHECK(MPI_Allreduce(MPI_IN_PLACE, matr.data(), static_cast<int>(matr.size_padded()), detail::mpi_datatype<T>(), MPI_SUM, comm_));
+#endif
+    }
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    /**
+     * @brief Add implicit conversion operator back to a native MPI communicator.
+     * @return The wrapped MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] operator MPI_Comm() const { return comm_; }
+#endif
+
+    /**
+     * @brief Update the load balancing weights.
+     * @param[in] weights the new weights
+     * @throws plssvm::mpi_exception if the number of @p weights does not match the MPI communicator size
+     */
+    void set_load_balancing_weights(std::vector<std::size_t> weights);
+
+    /**
+     * @brief Return the current load balancing weights if any.
+     * @details If assertions are enabled and there are load balancing weights, always checks whether the load balancing weights are the same for **all** MPI ranks.
+     * @return the weights (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const std::optional<std::vector<std::size_t>> &get_load_balancing_weights() const noexcept;
+
+    /**
+     * @brief Check whether @p lhs and @p rhs are equal, i.e., they are identical to each other, otherwise no collective operations are supported.
+     * @param[in] lhs the first MPI communicator
+     * @param[in] rhs the second MPI communicator
+     * @return `true` if both communicators are identical, otherwise `false`
+     */
+    friend bool operator==(const communicator &lhs, const communicator &rhs) noexcept;
+    /**
+     * @brief Check whether @p lhs and @p rhs are unequal, i.e., they are **not** identical to each other.
+     * @param[in] lhs the first MPI communicator
+     * @param[in] rhs the second MPI communicator
+     * @return `true` if both communicators are **not** identical, otherwise `false`
+     */
+    friend bool operator!=(const communicator &lhs, const communicator &rhs) noexcept;
+
+  private:
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    /// The wrapped MPI communicator. Only available if `PLSSVM_HAS_MPI_ENABLED` is defined!
+    MPI_Comm comm_{ MPI_COMM_WORLD };
+#endif
+    /// The MPI load balancing weights. Always guaranteed to be the same size as the communicator size.
+    std::optional<std::vector<std::size_t>> load_balancing_weights_{ std::nullopt };
+};
+
+}  // namespace plssvm::mpi
+
+#endif  // PLSSVM_MPI_COMMUNICATOR_HPP_
diff --git a/include/plssvm/mpi/detail/information.hpp b/include/plssvm/mpi/detail/information.hpp
new file mode 100644
index 000000000..6c61ca2ce
--- /dev/null
+++ b/include/plssvm/mpi/detail/information.hpp
@@ -0,0 +1,56 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions to gather MPI rank specific information on the main rank and print the out.
+ */
+
+#ifndef PLSSVM_MPI_DETAIL_INFORMATION_HPP_
+#define PLSSVM_MPI_DETAIL_INFORMATION_HPP_
+#pragma once
+
+#include "plssvm/backend_types.hpp"     // plssvm::backend_type
+#include "plssvm/mpi/communicator.hpp"  // plssvm::mpi::communicator
+#include "plssvm/solver_types.hpp"      // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"  // plssvm::target_platform
+
+#include <optional>  // std::optional, std::nullopt
+#include <string>    // std::string
+#include <vector>    // std::vector
+
+namespace plssvm::mpi::detail {
+
+/**
+ * @brief Communicate the @p solver from each MPI rank in @p comm to @p comm's main rank and outputs the result to the console.
+ * @details Only outputs the content on the main MPI rank!
+ * @param[in] comm the communicator to gather the solver information from
+ * @param[in] rank_solver the solver type used on the current MPI rank, gathered on the main MPI rank
+ */
+void gather_and_print_solver_information(const communicator &comm, solver_type rank_solver);
+
+/**
+ * @brief Communicate the CSVM information, including the used backend, target platform, and device names, from each MPI rank in @p comm to @p comm's main rank and outputs the result to the console.
+ * @param[in] comm the communicator to gather the solver information from
+ * @param[in] rank_backend the backend used on the current MPI rank, gathered on the main MPI rank
+ * @param[in] rank_target the target platform used on the current MPI rank, gathered on the main MPI rank
+ * @param[in] rank_devices the device (names) used on the current MPI rank, gathered on the main MPI rank
+ * @param[in] additional_info optional additional information used on the current MPI rank, gathered on the main MPI rank
+ */
+void gather_and_print_csvm_information(const communicator &comm, backend_type rank_backend, target_platform rank_target, const std::vector<std::string> &rank_devices, const std::optional<std::string> &additional_info = std::nullopt);
+
+/**
+ * @brief Communicate the CSVM information, including the used backend, target platform, and device names, from each MPI rank in @p comm to @p comm's main rank and outputs the result to the console.
+ * @param[in] comm the communicator to gather the solver information from
+ * @param[in] rank_backend the backend used on the current MPI rank, gathered on the main MPI rank
+ * @param[in] rank_target the target platform used on the current MPI rank, gathered on the main MPI rank
+ * @param[in] additional_info optional additional information used on the current MPI rank, gathered on the main MPI rank
+ */
+void gather_and_print_csvm_information(const communicator &comm, backend_type rank_backend, target_platform rank_target, const std::optional<std::string> &additional_info = std::nullopt);
+
+}  // namespace plssvm::mpi::detail
+
+#endif  // PLSSVM_MPI_DETAIL_INFORMATION_HPP_
diff --git a/include/plssvm/mpi/detail/mpi_datatype.hpp b/include/plssvm/mpi/detail/mpi_datatype.hpp
new file mode 100644
index 000000000..c9efa2e43
--- /dev/null
+++ b/include/plssvm/mpi/detail/mpi_datatype.hpp
@@ -0,0 +1,96 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Convert a provided type to the corresponding MPI_Datatype, if possible.
+ */
+
+#ifndef PLSSVM_MPI_DETAIL_MPI_DATATYPE_HPP_
+#define PLSSVM_MPI_DETAIL_MPI_DATATYPE_HPP_
+#pragma once
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+    #include "mpi.h"  // MPI_Datatype, various MPI datatypes
+
+    #include <complex>      // std::complex
+    #include <type_traits>  // std::enable_if_t, std::is_enum_v, std::underlying_type_t
+
+    /**
+     * @def PLSSVM_CREATE_MPI_DATATYPE_MAPPING
+     * @brief Defines a macro to create all possible conversion from a C++ type to a MPI_Datatype.
+     * @param[in] cpp_type the C++ type
+     * @param[in] mpi_type the corresponding MPI_Datatype
+     */
+    #define PLSSVM_CREATE_MPI_DATATYPE_MAPPING(cpp_type, mpi_type) \
+        template <>                                                \
+        [[nodiscard]] inline MPI_Datatype mpi_datatype<cpp_type>() { return mpi_type; }
+
+namespace plssvm::mpi::detail {
+
+/**
+ * @brief Tries to convert the given C++ type to its corresponding MPI_Datatype.
+ * @details The definition is marked as **deleted** if `T` isn't representable as [`MPI_Datatype`](https://www.mpi-forum.org/docs/mpi-2.2/mpi22-report/node44.htm) or an enum.
+ * @tparam T the type to convert to a MPI_Datatype
+ * @return the corresponding MPI_Datatype (`[[nodiscard]]`)
+ */
+template <typename T, std::enable_if_t<!std::is_enum_v<T>, bool> = true>
+[[nodiscard]] inline MPI_Datatype mpi_datatype() = delete;
+
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(bool, MPI_C_BOOL)
+
+// character types
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(char, MPI_CHAR)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(signed char, MPI_SIGNED_CHAR)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(unsigned char, MPI_UNSIGNED_CHAR)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(wchar_t, MPI_WCHAR)
+
+// integer types
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(signed short, MPI_SHORT)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(unsigned short, MPI_UNSIGNED_SHORT)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(signed int, MPI_INT)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(unsigned int, MPI_UNSIGNED)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(signed long int, MPI_LONG)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(unsigned long int, MPI_UNSIGNED_LONG)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(signed long long int, MPI_LONG_LONG)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(unsigned long long int, MPI_UNSIGNED_LONG_LONG)
+// PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::int8_t, MPI_INT8_T)
+// PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::int16_t, MPI_INT16_T)
+// PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::int32_t, MPI_INT32_T)
+// PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::int64_t, MPI_INT64_T)
+// PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::uint8_t, MPI_UINT8_T)
+// PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::uint16_t, MPI_UINT16_T)
+// PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::uint32_t, MPI_UINT32_T)
+// PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::uint64_t, MPI_UINT64_T)
+
+// floating point types
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(float, MPI_FLOAT)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(double, MPI_DOUBLE)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(long double, MPI_LONG_DOUBLE)
+
+// complex types
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::complex<float>, MPI_C_COMPLEX)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::complex<double>, MPI_C_DOUBLE_COMPLEX)
+PLSSVM_CREATE_MPI_DATATYPE_MAPPING(std::complex<long double>, MPI_C_LONG_DOUBLE_COMPLEX)
+
+/**
+ * @brief Specialization for enums: for enums, use their underlying type in MPI communications.
+ * @tparam T the enum type to convert to a MPI_Datatype
+ * @return the corresponding MPI_Datatype (`[[nodiscard]]`)
+ */
+template <typename T, std::enable_if_t<std::is_enum_v<T>, bool> = true>
+[[nodiscard]] inline MPI_Datatype mpi_datatype() {
+    return mpi_datatype<std::underlying_type_t<T>>();
+}
+
+}  // namespace plssvm::mpi::detail
+
+    #undef PLSSVM_CREATE_MPI_DATATYPE_MAPPING
+
+#endif
+
+#endif  // PLSSVM_MPI_DETAIL_MPI_DATATYPE_HPP_
diff --git a/include/plssvm/mpi/detail/utility.hpp b/include/plssvm/mpi/detail/utility.hpp
new file mode 100644
index 000000000..80c2f6d84
--- /dev/null
+++ b/include/plssvm/mpi/detail/utility.hpp
@@ -0,0 +1,45 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines some utility functions for our optional MPI usage.
+ */
+
+#ifndef PLSSVM_MPI_DETAIL_UTILITY_HPP_
+#define PLSSVM_MPI_DETAIL_UTILITY_HPP_
+#pragma once
+
+#include <string>  // std::string
+
+/**
+ * @def PLSSVM_MPI_ERROR_CHECK
+ * @brief Check the MPI error @p err. If @p err signals an error, throw a plssvm::mpi_exception.
+ * @throws plssvm::mpi_exception if the error code signals a failure
+ */
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #define PLSSVM_MPI_ERROR_CHECK(err) plssvm::mpi::detail::mpi_error_check(err)
+#else
+    #define PLSSVM_MPI_ERROR_CHECK(...)
+#endif
+
+namespace plssvm::mpi::detail {
+
+/**
+ * @brief Checks whether @p err is equal to `MPI_SUCCESS`. If this is not the case, throws an exception.
+ * @param[in] err the error code to check
+ */
+void mpi_error_check(int err);
+
+/**
+ * @brief Get the current processor name.
+ * @return the processor name (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string node_name();
+
+}  // namespace plssvm::mpi::detail
+
+#endif  // PLSSVM_MPI_DETAIL_UTILITY_HPP_
diff --git a/include/plssvm/mpi/detail/version.hpp b/include/plssvm/mpi/detail/version.hpp
new file mode 100644
index 000000000..2fb5aff69
--- /dev/null
+++ b/include/plssvm/mpi/detail/version.hpp
@@ -0,0 +1,33 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines some version functions for our optional MPI usage.
+ */
+
+#ifndef PLSSVM_MPI_DETAIL_VERSION_HPP_
+#define PLSSVM_MPI_DETAIL_VERSION_HPP_
+
+#include <string>  // std::string
+
+namespace plssvm::mpi::detail {
+
+/**
+ * @brief Get the used MPI library version.
+ * @return the MPI library version (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string mpi_library_version();
+
+/**
+ * @brief Get the used MPI version.
+ * @return the MPI version (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string mpi_version();
+
+}  // namespace plssvm::mpi::detail
+
+#endif  // PLSSVM_MPI_DETAIL_VERSION_HPP_
diff --git a/include/plssvm/mpi/environment.hpp b/include/plssvm/mpi/environment.hpp
new file mode 100644
index 000000000..49a0d8559
--- /dev/null
+++ b/include/plssvm/mpi/environment.hpp
@@ -0,0 +1,71 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines some wrapper functions around MPI specific environment functions.
+ * @details These wrapper functions are only conditionally compiled such that MPI is still an **optional** dependency in PLSSVM.
+ */
+
+#ifndef PLSSVM_MPI_ENVIRONMENT_HPP_
+#define PLSSVM_MPI_ENVIRONMENT_HPP_
+#pragma once
+
+namespace plssvm::mpi {
+
+/**
+ * @brief Initialize the MPI environment.
+ * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, does nothing.
+ */
+void init();
+/**
+ * @brief Initialize the MPI environment with the provided command line arguments.
+ * @param[in,out] argc the number of command line arguments
+ * @param[in,out] argv the values of the command line arguments
+ */
+void init(int &argc, char **argv);
+
+/**
+ * @brief Finalize the MPI environment.
+ * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, does nothing.
+ */
+void finalize();
+/**
+ * @brief Abort the MPI environment associated with `MPI_COMM_WORLD`.
+ * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, calls std::abort.
+ */
+void abort_world();
+
+/**
+ * @brief Check if the MPI environment has been successfully initialized.
+ * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns `true`.
+ * @return `true` if the environment was successfully initialized, otherwise `false` (`[[nodiscard]]`)
+ */
+[[nodiscard]] bool is_initialized();
+/**
+ * @brief Check if the MPI environment has been successfully finalized.
+ * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns `true`.
+ * @return `true` if the environment was successfully finalized, otherwise `false` (`[[nodiscard]]`)
+ */
+[[nodiscard]] bool is_finalized();
+/**
+ * @brief Check if the MPI environment is currently active, i.e., `init` has already been called, but not `finalize`.
+ * @details If `PLSSVM_HAS_MPI_ENABLED` is undefined, returns `false`.
+ * @return `true` if the environment is currently active, otherwise `false` (`[[nodiscard]]`)
+ */
+[[nodiscard]] bool is_active();
+
+/**
+ * @brief Returns `true` if the executable was started via `mpirun`.
+ * @details Checks for the existence of the environment variables: `OMPI_COMM_WORLD_SIZE`, `PMI_SIZE`, or `SLURM_PROCID`.
+ * @note Will falsely return `true` if any of the environment variables is explicitly set by the user!
+ * @return `true` if the executable was started via `mpirun`, otherwise `false` (`[[nodiscard]]`)
+ */
+[[nodiscard]] bool is_executed_via_mpirun();
+
+}  // namespace plssvm::mpi
+
+#endif  // PLSSVM_MPI_ENVIRONMENT_HPP_
diff --git a/include/plssvm/parameter.hpp b/include/plssvm/parameter.hpp
index 25df3ff8f..378c3d3ea 100644
--- a/include/plssvm/parameter.hpp
+++ b/include/plssvm/parameter.hpp
@@ -13,13 +13,15 @@
 #define PLSSVM_PARAMETER_HPP_
 #pragma once
 
-#include "plssvm/constants.hpp"                                    // plssvm::real_type
-#include "plssvm/detail/igor_utility.hpp"                          // plssvm::detail::{has_only_named_args_v, get_value_from_named_parameter}
-#include "plssvm/detail/logging_without_performance_tracking.hpp"  // plssvm::detail::log_untracked
-#include "plssvm/detail/type_traits.hpp"                           // PLSSVM_REQUIRES, plssvm::detail::{remove_cvref_t, always_false_v}
-#include "plssvm/gamma.hpp"                                        // plssvm::gamma_type
-#include "plssvm/kernel_function_types.hpp"                        // plssvm::kernel_function_type, plssvm::kernel_function_type_to_math_string
-#include "plssvm/verbosity_levels.hpp"                             // plssvm::verbosity_level, plssvm::verbosity
+#include "plssvm/constants.hpp"                     // plssvm::real_type
+#include "plssvm/detail/igor_utility.hpp"           // plssvm::detail::{has_only_named_args_v, get_value_from_named_parameter}
+#include "plssvm/detail/logging/log_untracked.hpp"  // plssvm::detail::log_untracked
+#include "plssvm/detail/type_traits.hpp"            // PLSSVM_REQUIRES, plssvm::detail::{remove_cvref_t, always_false_v}
+#include "plssvm/detail/utility.hpp"                // plssvm::detail::to_underlying
+#include "plssvm/exceptions/exceptions.hpp"         // plssvm::invalid_parameter_exception
+#include "plssvm/gamma.hpp"                         // plssvm::gamma_type
+#include "plssvm/kernel_function_types.hpp"         // plssvm::kernel_function_type, plssvm::kernel_function_type_to_math_string
+#include "plssvm/verbosity_levels.hpp"              // plssvm::verbosity_level, plssvm::verbosity
 
 #include "fmt/base.h"     // fmt::formatter
 #include "fmt/format.h"   // fmt::format
@@ -29,6 +31,7 @@
 #include <iosfwd>       // forward declare std::ostream and std::istream
 #include <string_view>  // std::string_view
 #include <utility>      // std::forward
+#include <variant>      // std::variant, std::holds_alternative, std::get
 
 namespace plssvm {
 
@@ -117,12 +120,14 @@ struct parameter {
      * @param[in] coef0_p the coef0 used in the polynomial kernel function
      * @param[in] cost_p the cost used in all kernel functions
      */
-    constexpr parameter(const kernel_function_type kernel_p, const int degree_p, const gamma_type gamma_p, const real_type coef0_p, const real_type cost_p) noexcept :
+    parameter(const kernel_function_type kernel_p, const int degree_p, const gamma_type gamma_p, const real_type coef0_p, const real_type cost_p) :
         kernel_type{ kernel_p },
         degree{ degree_p },
         gamma{ gamma_p },
         coef0{ coef0_p },
         cost{ cost_p } {
+        // sanity check the provided parameter values
+        this->sanity_check_parameter();
     }
 
     /**
@@ -132,9 +137,11 @@ struct parameter {
      * @param[in] named_args the potential named-parameters
      */
     template <typename... Args, PLSSVM_REQUIRES(detail::has_only_named_args_v<Args...>)>
-    constexpr explicit parameter(const parameter &params, Args &&...named_args) :
+    explicit parameter(const parameter &params, Args &&...named_args) :
         parameter{ params } {
         this->set_named_arguments(std::forward<Args>(named_args)...);
+        // sanity check the provided parameter values
+        this->sanity_check_parameter();
     }
 
     /**
@@ -143,8 +150,10 @@ struct parameter {
      * @param[in] named_args the potential named-parameters
      */
     template <typename... Args, PLSSVM_REQUIRES(detail::has_only_named_args_v<Args...>)>
-    constexpr explicit parameter(Args &&...named_args) noexcept {
+    constexpr explicit parameter(Args &&...named_args) {
         this->set_named_arguments(std::forward<Args>(named_args)...);
+        // sanity check the provided parameter values
+        this->sanity_check_parameter();
     }
 
     /// The used kernel function: linear, polynomial, radial basis functions (rbf), sigmoid, laplacian, or chi-squared.
@@ -260,6 +269,36 @@ struct parameter {
             cost = detail::get_value_from_named_parameter<decltype(cost)>(parser, plssvm::cost);
         }
     }
+
+    /**
+     * @brief Perform some sanity checks on the passed parameters.
+     * @throws plssvm::invalid_parameter_exception if the kernel function is invalid
+     * @throws plssvm::invalid_parameter_exception if the gamma value for the polynomial or radial basis function kernel is **not** greater than zero
+     */
+    void sanity_check_parameter() const {
+        // kernel: valid kernel function
+        const auto kernel_type_value = detail::to_underlying(kernel_type);
+        if (kernel_type_value < 0 || kernel_type_value >= 6) {
+            throw invalid_parameter_exception{ fmt::format("Invalid kernel function with value {} given!", kernel_type_value) };
+        }
+
+        // degree: must be greater or equal than 0
+        if (kernel_type == kernel_function_type::polynomial && degree < 0) {
+            throw invalid_parameter_exception{ fmt::format("degree must be non-negative, but is {}!", degree) };
+        }
+
+        // gamma: must be greater or equal than 0 IF explicitly provided as real_type (not for the linear kernel)
+        if (kernel_type != kernel_function_type::linear && std::holds_alternative<real_type>(gamma) && std::get<real_type>(gamma) < real_type{ 0.0 }) {
+            throw invalid_parameter_exception{ fmt::format("gamma must be non-negative, but is {}!", std::get<real_type>(gamma)) };
+        }
+
+        // coef0: all allowed
+
+        // cost:  must be greater than 0
+        if (cost <= real_type{ 0.0 }) {
+            throw invalid_parameter_exception{ fmt::format("cost must be strictly-positive, but is {}!", cost) };
+        }
+    }
 };
 
 /**
diff --git a/include/plssvm/svm/csvc.hpp b/include/plssvm/svm/csvc.hpp
index db4359e25..5a1de4ff5 100644
--- a/include/plssvm/svm/csvc.hpp
+++ b/include/plssvm/svm/csvc.hpp
@@ -18,10 +18,11 @@
 #include "plssvm/data_set/classification_data_set.hpp"     // plssvm::classification_data_set
 #include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
 #include "plssvm/detail/igor_utility.hpp"                  // plssvm::detail::{has_only_named_args_v, get_value_from_named_parameter}
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"     // plssvm::detail::log_untracked
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT, plssvm::detail::tracking::tracking_entry
 #include "plssvm/detail/utility.hpp"                       // plssvm::detail::contains
-#include "plssvm/exceptions/exceptions.hpp"                // plssvm::invalid_parameter_exception
+#include "plssvm/exceptions/exceptions.hpp"                // plssvm::invalid_parameter_exception, plssvm::mpi_exception
 #include "plssvm/gamma.hpp"                                // plssvm::calculate_gamma_value
 #include "plssvm/kernel_function_types.hpp"                // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                               // plssvm::aos_matrix, plssvm::soa_matrix
@@ -37,7 +38,7 @@
 #include <chrono>       // std::chrono::{time_point, steady_clock, duration_cast, milliseconds}
 #include <cstddef>      // std::size_t
 #include <limits>       // std::numeric_limits::lowest
-#include <memory>       // std::make_shared, std::dynamic_pointer_cast, std::addressof
+#include <memory>       // std::make_shared, std::addressof
 #include <optional>     // std::make_optional
 #include <tuple>        // std::tie
 #include <type_traits>  // std::is_same_v
@@ -114,6 +115,7 @@ class csvc : virtual public csvm {
      * @throws plssvm::invlaid_parameter_exception if the provided maximum number of iterations is less or equal than zero
      * @throws plssvm::invalid_parameter_exception if the training @p data does **not** include labels
      * @throws plssvm::exception any exception thrown in the respective backend's implementation of `plssvm::csvm::solve_lssvm_system_of_linear_equations`
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVC and the MPI communicator of the @p data set are not identical
      * @note For binary classification **always** one vs. all is used regardless of the provided parameter!
      * @return the learned model (`[[nodiscard]]`)
      */
@@ -134,6 +136,10 @@ class csvc : virtual public csvm {
         if (!data.has_labels()) {
             throw invalid_parameter_exception{ "No labels given for training! Maybe the data is only usable for prediction?" };
         }
+        // check whether the C-SVC and data set MPI communicators are identical
+        if (comm_ != data.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVC and data set must be identical!" };
+        }
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT("fit start");
 
@@ -159,10 +165,11 @@ class csvc : virtual public csvm {
         // start fitting the data set using a C-SVM
         const std::chrono::time_point start_time = std::chrono::steady_clock::now();
 
-        detail::log(verbosity_level::full,
-                    "Using {} ({}) as multi-class classification strategy.\n",
-                    used_classification,
-                    classification_type_to_full_string(used_classification));
+        detail::log_untracked(verbosity_level::full,
+                              comm_,
+                              "Using {} ({}) as multi-class classification strategy.\n",
+                              used_classification,
+                              classification_type_to_full_string(used_classification));
 
         // copy parameter and set gamma if necessary
         parameter params{ params_ };
@@ -200,10 +207,11 @@ class csvc : virtual public csvm {
 
             if (num_classes == 2) {
                 // special optimization for binary case (no temporary copies necessary)
-                detail::log(verbosity_level::full,
-                            "\nClassifying 0 vs 1 ({} vs {}) (1/1):\n",
-                            data.mapping_->get_label_by_mapped_index(0),
-                            data.mapping_->get_label_by_mapped_index(1));
+                detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nClassifying 0 vs 1 ({} vs {}) (1/1):\n",
+                                      data.mapping_->get_label_by_mapped_index(0),
+                                      data.mapping_->get_label_by_mapped_index(1));
 
                 // reduce the size of the rhs (y_ptr)
                 // -> consistent with the multi-class case as well as when reading the model from file in the model class constructor
@@ -243,14 +251,15 @@ class csvc : virtual public csvm {
                         }
 
                         // solve the minimization problem -> note that only a single rhs is present
-                        detail::log(verbosity_level::full,
-                                    "\nClassifying {} vs {} ({} vs {}) ({}/{}):\n",
-                                    i,
-                                    j,
-                                    data.mapping_->get_label_by_mapped_index(i),
-                                    data.mapping_->get_label_by_mapped_index(j),
-                                    pos + 1,
-                                    calculate_number_of_classifiers(classification_type::oao, num_classes));
+                        detail::log_untracked(verbosity_level::full,
+                                              comm_,
+                                              "\nClassifying {} vs {} ({} vs {}) ({}/{}):\n",
+                                              i,
+                                              j,
+                                              data.mapping_->get_label_by_mapped_index(i),
+                                              data.mapping_->get_label_by_mapped_index(j),
+                                              pos + 1,
+                                              calculate_number_of_classifiers(classification_type::oao, num_classes));
                         const auto &[alpha, rho, num_iter] = this->solve_lssvm_system_of_linear_equations(binary_data, binary_y, params, std::forward<Args>(named_args)...);
                         (*csvc_model.alpha_ptr_)[pos] = std::move(alpha);
                         (*csvc_model.rho_ptr_)[pos] = rho.front();  // prevents std::tie
@@ -270,6 +279,7 @@ class csvc : virtual public csvm {
 
         const std::chrono::time_point end_time = std::chrono::steady_clock::now();
         detail::log(verbosity_level::full | verbosity_level::timing,
+                    comm_,
                     "\nLearned the SVC classifier for {} multi-class classification in {}.\n\n",
                     classification_type_to_full_string(used_classification),
                     detail::tracking::tracking_entry{ "cg", "total_runtime", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
@@ -290,6 +300,8 @@ class csvc : virtual public csvm {
      * @param[in] data the data to predict the labels for
      * @throws plssvm::invalid_parameter_exception if the number of features in the @p model's support vectors don't match the number of features in the @p data set
      * @throws plssvm::exception any exception thrown in the respective backend's implementation of `plssvm::csvm::predict_values`
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVC and the MPI communicator of the @p model set are not identical
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVC and the MPI communicator of the @p data set are not identical
      * @return the predicted labels (`[[nodiscard]]`)
      */
     template <typename label_type>
@@ -314,6 +326,14 @@ class csvc : virtual public csvm {
         if (model.num_features() != data.num_features()) {
             throw invalid_parameter_exception{ fmt::format("Number of features per data point ({}) must match the number of features per support vector of the provided model ({})!", data.num_features(), model.num_features()) };
         }
+        // check whether the C-SVC and model MPI communicators are identical
+        if (comm_ != model.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVC and model must be identical!" };
+        }
+        // check whether the C-SVC and data set MPI communicators are identical
+        if (comm_ != data.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVC and data set must be identical!" };
+        }
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT("predict start");
 
@@ -338,8 +358,11 @@ class csvc : virtual public csvm {
             PLSSVM_ASSERT(votes.num_rows() == data.num_data_points(), "The number of votes ({}) must be equal the number of data points ({})!", votes.num_rows(), data.num_data_points());
             PLSSVM_ASSERT(votes.num_cols() == calculate_number_of_classifiers(classification_type::oaa, model.num_classes()), "The votes contain {} values, but must contain {} values!", votes.num_cols(), calculate_number_of_classifiers(classification_type::oaa, model.num_classes()));
 
+            // extract mapping
+            const auto &mapping = *dynamic_cast<const classification_data_set<label_type> &>(*model.data_).mapping_;
+
 // use voting
-#pragma omp parallel for default(none) shared(predicted_labels, votes, model) if (!std::is_same_v<label_type, bool>)
+#pragma omp parallel for default(none) shared(predicted_labels, votes, mapping) if (!std::is_same_v<label_type, bool>)
             for (std::size_t i = 0; i < predicted_labels.size(); ++i) {
                 std::size_t argmax = 0;
                 real_type max = std::numeric_limits<real_type>::lowest();
@@ -349,7 +372,7 @@ class csvc : virtual public csvm {
                         max = votes(i, v);
                     }
                 }
-                predicted_labels[i] = std::dynamic_pointer_cast<classification_data_set<label_type>>(model.data_)->mapping_->get_label_by_mapped_index(argmax);
+                predicted_labels[i] = mapping.get_label_by_mapped_index(argmax);
             }
         } else if (model.get_classification_type() == classification_type::oao) {
             PLSSVM_ASSERT(model.index_sets_ptr_ != nullptr, "The index_sets_ptr_ may never be a nullptr!");
@@ -456,8 +479,11 @@ class csvc : virtual public csvm {
                 }
             }
 
+            // extract mapping
+            const auto &mapping = *dynamic_cast<const classification_data_set<label_type> &>(*model.data_).mapping_;
+
 // map majority vote to predicted class
-#pragma omp parallel for default(none) shared(predicted_labels, class_votes, model) if (!std::is_same_v<label_type, bool>)
+#pragma omp parallel for default(none) shared(predicted_labels, class_votes, mapping) if (!std::is_same_v<label_type, bool>)
             for (std::size_t i = 0; i < predicted_labels.size(); ++i) {
                 std::size_t argmax = 0;
                 std::size_t max = 0;
@@ -467,7 +493,7 @@ class csvc : virtual public csvm {
                         max = class_votes(i, v);
                     }
                 }
-                predicted_labels[i] = std::dynamic_pointer_cast<classification_data_set<label_type>>(model.data_)->mapping_->get_label_by_mapped_index(argmax);
+                predicted_labels[i] = mapping.get_label_by_mapped_index(argmax);
             }
         }
 
@@ -482,6 +508,7 @@ class csvc : virtual public csvm {
      * @tparam label_type the type of the label (an arithmetic type or `std::string`)
      * @param[in] model a previously learned model
      * @throws plssvm::exception any exception thrown in the respective backend's implementation of `plssvm::csvm::predict_values`
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVC and the MPI communicator of the @p model set are not identical
      * @return the accuracy of the model (`[[nodiscard]]`)
      */
     template <typename label_type>
@@ -498,6 +525,8 @@ class csvc : virtual public csvm {
      * @throws plssvm::invalid_parameter_exception if the @p data to score has no labels
      * @throws plssvm::invalid_parameter_exception if the number of features in the @p model's support vectors don't match the number of features in the @p data set
      * @throws plssvm::exception any exception thrown in the respective backend's implementation of `plssvm::csvm::predict_values`
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVC and the MPI communicator of the @p model set are not identical
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVC and the MPI communicator of the @p data set are not identical
      * @return the accuracy of the labeled @p data (`[[nodiscard]]`)
      */
     template <typename label_type>
@@ -510,6 +539,14 @@ class csvc : virtual public csvm {
         if (model.num_features() != data.num_features()) {
             throw invalid_parameter_exception{ fmt::format("Number of features per data point ({}) must match the number of features per support vector of the provided model ({})!", data.num_features(), model.num_features()) };
         }
+        // check whether the C-SVC and model MPI communicators are identical
+        if (comm_ != model.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVC and model must be identical!" };
+        }
+        // check whether the C-SVC and data set MPI communicators are identical
+        if (comm_ != data.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVC and data set must be identical!" };
+        }
 
         // predict labels
         const std::vector<label_type> predicted_labels = this->predict(model, data);
diff --git a/include/plssvm/svm/csvm.hpp b/include/plssvm/svm/csvm.hpp
index cbe25163c..1acd4738e 100644
--- a/include/plssvm/svm/csvm.hpp
+++ b/include/plssvm/svm/csvm.hpp
@@ -18,13 +18,16 @@
 #include "plssvm/detail/data_distribution.hpp"             // plssvm::detail::triangular_data_distribution
 #include "plssvm/detail/data_distribution.hpp"             // plssvm::detail::data_distribution
 #include "plssvm/detail/igor_utility.hpp"                  // plssvm::detail::{get_value_from_named_parameter, has_only_parameter_named_args_v}
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"     // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                   // plssvm::detail::memory_size
 #include "plssvm/detail/move_only_any.hpp"                 // plssvm::detail::move_only_any
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT, plssvm::detail::tracking::tracking_entry
 #include "plssvm/detail/type_traits.hpp"                   // PLSSVM_REQUIRES, plssvm::detail::remove_cvref_t
 #include "plssvm/exceptions/exceptions.hpp"                // plssvm::invalid_parameter_exception
 #include "plssvm/matrix.hpp"                               // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"                     // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"               // plssvm::mpi::detail::gather_and_print_solver_information
 #include "plssvm/parameter.hpp"                            // plssvm::parameter
 #include "plssvm/shape.hpp"                                // plssvm::shape
 #include "plssvm/solver_types.hpp"                         // plssvm::solver_type
@@ -32,6 +35,7 @@
 #include "plssvm/verbosity_levels.hpp"                     // plssvm::verbosity_level
 
 #include "fmt/format.h"   // fmt::format
+#include "fmt/ranges.h"   // fmt::join
 #include "igor/igor.hpp"  // igor::parser
 
 #include <algorithm>    // std::max
@@ -53,19 +57,26 @@ namespace plssvm {
  */
 class csvm {
   public:
+    /**
+     * @brief Default constructor.
+     * @details Needed due to multiple-inheritance.
+     */
+    csvm() = default;
     /**
      * @brief Construct a C-SVM using the SVM parameter @p params.
      * @details Uses the default SVM parameter if none are provided.
+     * @param[in] comm the used MPI communicator
      * @param[in] params the SVM parameter
      */
-    explicit csvm(parameter params = {});
+    explicit csvm(mpi::communicator comm, parameter params = {});
     /**
      * @brief Construct a C-SVM forwarding all parameters @p args to the plssvm::parameter constructor.
      * @tparam Args the type of the (named-)parameters
+     * @param[in] comm the used MPI communicator
      * @param[in] args the parameters used to construct a plssvm::parameter
      */
     template <typename... Args>
-    explicit csvm(Args &&...args);
+    explicit csvm(mpi::communicator comm, Args &&...args);
 
     /**
      * @brief Delete copy-constructor since a C-SVM is a move-only type.
@@ -122,6 +133,14 @@ class csvm {
     template <typename... Args, PLSSVM_REQUIRES(detail::has_only_parameter_named_args_v<Args...>)>
     void set_params(Args &&...named_args);
 
+    /**
+     * @brief Get the associated MPI communicator.
+     * @return the MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const mpi::communicator &communicator() const noexcept {
+        return comm_;
+    }
+
   protected:
     //*************************************************************************************************************************************//
     //                        pure virtual functions, must be implemented for all subclasses; doing the actual work                        //
@@ -178,18 +197,6 @@ class csvm {
      */
     [[nodiscard]] virtual aos_matrix<real_type> predict_values(const parameter &params, const soa_matrix<real_type> &support_vectors, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, soa_matrix<real_type> &w, const soa_matrix<real_type> &predict_points) const = 0;
 
-    /// The target platform of this SVM.
-    target_platform target_{ plssvm::target_platform::automatic };
-    /// The data distribution on the available devices.
-    mutable std::unique_ptr<detail::data_distribution> data_distribution_{};
-
-    /**
-     * @brief Perform some sanity checks on the passed SVM parameters.
-     * @throws plssvm::invalid_parameter_exception if the kernel function is invalid
-     * @throws plssvm::invalid_parameter_exception if the gamma value for the polynomial or radial basis function kernel is **not** greater than zero
-     */
-    void sanity_check_parameter() const;
-
     /**
      * @brief Solve the system of linear equations `K * X = B` where `K` is the kernel matrix assembled from @p A using the @p params with potentially multiple right-hand sides.
      * @tparam Args the type of the potential additional parameters
@@ -235,17 +242,23 @@ class csvm {
 
     /// The SVM parameter (e.g., cost, degree, gamma, coef0) currently in use.
     parameter params_{};
+    /// The target platform of this SVM.
+    target_platform target_{ plssvm::target_platform::automatic };
+    /// The data distribution on the available devices.
+    mutable std::unique_ptr<detail::data_distribution> data_distribution_{};
+    /// The used MPI communicator.
+    mpi::communicator comm_{};
 };
 
-inline csvm::csvm(parameter params) :
-    params_{ params } {
-    this->sanity_check_parameter();
+inline csvm::csvm(mpi::communicator comm, parameter params) :
+    params_{ params },
+    comm_{ std::move(comm) } {
 }
 
 template <typename... Args>
-csvm::csvm(Args &&...named_args) :
-    params_{ std::forward<Args>(named_args)... } {
-    this->sanity_check_parameter();
+csvm::csvm(mpi::communicator comm, Args &&...named_args) :
+    params_{ std::forward<Args>(named_args)... },
+    comm_{ std::move(comm) } {
 }
 
 template <typename... Args, std::enable_if_t<detail::has_only_parameter_named_args_v<Args...>, bool>>
@@ -254,9 +267,6 @@ void csvm::set_params(Args &&...named_args) {
 
     // update the parameters
     params_.set_named_arguments(std::forward<Args>(named_args)...);
-
-    // check if the new parameters make sense
-    this->sanity_check_parameter();
 }
 
 //*************************************************************************************************************************************//
@@ -324,6 +334,9 @@ std::tuple<aos_matrix<real_type>, std::vector<real_type>, std::vector<unsigned l
         constexpr detail::memory_size minimal_safety_margin = 512_MiB;
         constexpr long double percentual_safety_margin = 0.05L;
         const auto reduce_total_memory = [=](const detail::memory_size total_memory) {
+            if (total_memory < 512_MiB) {
+                throw kernel_launch_resources{ fmt::format("At least {} of memory must be available, but available are only {}!", 512_MiB, total_memory) };
+            }
             return total_memory - std::max(total_memory * percentual_safety_margin, minimal_safety_margin);
         };
 
@@ -342,7 +355,7 @@ std::tuple<aos_matrix<real_type>, std::vector<real_type>, std::vector<unsigned l
         }();
 
         // calculate the maximum total memory needed for the explicit and implicit kernel matrix per device
-        const detail::triangular_data_distribution data_distribution{ num_rows_reduced, this->num_available_devices() };
+        const detail::triangular_data_distribution data_distribution{ comm_, num_rows_reduced, this->num_available_devices() };
         const std::vector<detail::memory_size> total_memory_needed_explicit_per_device = data_distribution.calculate_maximum_explicit_kernel_matrix_memory_needed_per_place(num_features, num_rhs);
         const std::vector<detail::memory_size> total_memory_needed_implicit_per_device = data_distribution.calculate_maximum_implicit_kernel_matrix_memory_needed_per_place(num_features, num_rhs);
 
@@ -355,23 +368,26 @@ std::tuple<aos_matrix<real_type>, std::vector<real_type>, std::vector<unsigned l
             }
         };
 
-        // output the necessary information on the console
-        detail::log(verbosity_level::full,
-                    "Determining the solver type based on the available memory:\n"
-                    "  - total system memory: {2}\n"
-                    "  - usable system memory (with safety margin of min({0} %, {1}): {3}\n"
-                    "  - total device memory: {4}\n"
-                    "  - usable device memory (with safety margin of min({0} %, {1}): {5}\n"
-                    "  - maximum memory needed (cg_explicit): {6}\n"
-                    "  - maximum memory needed (cg_implicit): {7}\n",
-                    static_cast<double>(percentual_safety_margin * 100.0L),
-                    minimal_safety_margin,
-                    detail::tracking::tracking_entry{ "solver", "system_memory", total_system_memory },
-                    detail::tracking::tracking_entry{ "solver", "usable_system_memory_with_safety_margin", usable_system_memory },
-                    format_vector(total_device_memory_per_device),
-                    format_vector(usable_device_memory_per_device),
-                    format_vector(total_memory_needed_explicit_per_device),
-                    format_vector(total_memory_needed_implicit_per_device));
+        if (comm_.size() <= 1) {
+            // output the necessary information on the console, full output only if a single MPI rank is used
+            detail::log(verbosity_level::full,
+                        comm_,
+                        "Determining the solver type based on the available memory:\n"
+                        "  - total system memory: {2}\n"
+                        "  - usable system memory (with safety margin of min({0} %, {1}): {3}\n"
+                        "  - total device memory: {4}\n"
+                        "  - usable device memory (with safety margin of min({0} %, {1}): {5}\n"
+                        "  - maximum memory needed (cg_explicit): {6}\n"
+                        "  - maximum memory needed (cg_implicit): {7}\n",
+                        static_cast<double>(percentual_safety_margin * 100.0L),
+                        minimal_safety_margin,
+                        detail::tracking::tracking_entry{ "solver", "system_memory", total_system_memory },
+                        detail::tracking::tracking_entry{ "solver", "usable_system_memory_with_safety_margin", usable_system_memory },
+                        format_vector(total_device_memory_per_device),
+                        format_vector(usable_device_memory_per_device),
+                        format_vector(total_memory_needed_explicit_per_device),
+                        format_vector(total_memory_needed_implicit_per_device));
+        }
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "solver", "device_memory", total_device_memory_per_device }));
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "solver", "usable_device_memory_with_safety_margin", usable_device_memory_per_device }));
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "solver", "needed_device_memory_cg_explicit", total_memory_needed_explicit_per_device }));
@@ -395,7 +411,13 @@ std::tuple<aos_matrix<real_type>, std::vector<real_type>, std::vector<unsigned l
             // use the explicit solver type
             used_solver = solver_type::cg_explicit;
         } else {
-            detail::log(verbosity_level::full, "Cannot use cg_explicit due to memory constraints on device(s) {}!\n", format_vector(failed_cg_explicit_constraints));
+            if (comm_.size() <= 1) {
+                // output only if a single MPI rank is used
+                detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "Cannot use cg_explicit due to memory constraints on device(s) {}!\n",
+                                      format_vector(failed_cg_explicit_constraints));
+            }
 
             // check whether there is enough memory available for cg_implicit
             if (const std::vector<std::size_t> failed_cg_implicit_constraints = check_sizes(total_memory_needed_implicit_per_device, usable_device_memory_per_device); failed_cg_implicit_constraints.empty()) {
@@ -417,13 +439,17 @@ std::tuple<aos_matrix<real_type>, std::vector<real_type>, std::vector<unsigned l
         const std::vector<detail::memory_size> max_single_allocation_cg_implicit_size_per_device = data_distribution.calculate_maximum_implicit_kernel_matrix_memory_allocation_size_per_place(num_features, num_rhs);
 
         // output the maximum memory allocation size per device
-        detail::log(verbosity_level::full,
-                    "  - maximum supported single memory allocation size: {}\n"
-                    "  - maximum needed single memory allocation size (cg_explicit): {}\n"
-                    "  - maximum needed single memory allocation size (cg_implicit): {}\n",
-                    format_vector(max_mem_alloc_size_per_device),
-                    format_vector(max_single_allocation_cg_explicit_size_per_device),
-                    format_vector(max_single_allocation_cg_implicit_size_per_device));
+        if (comm_.size() <= 1) {
+            // output only if a single MPI rank is used
+            detail::log_untracked(verbosity_level::full,
+                                  comm_,
+                                  "  - maximum supported single memory allocation size: {}\n"
+                                  "  - maximum needed single memory allocation size (cg_explicit): {}\n"
+                                  "  - maximum needed single memory allocation size (cg_implicit): {}\n",
+                                  format_vector(max_mem_alloc_size_per_device),
+                                  format_vector(max_single_allocation_cg_explicit_size_per_device),
+                                  format_vector(max_single_allocation_cg_implicit_size_per_device));
+        }
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "solver", "device_max_single_mem_alloc_size", max_mem_alloc_size_per_device }));
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "solver", "device_max_mem_alloc_size_cg_explicit", max_single_allocation_cg_explicit_size_per_device }));
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "solver", "device_max_mem_alloc_size_cg_implicit", max_single_allocation_cg_implicit_size_per_device }));
@@ -433,26 +459,42 @@ std::tuple<aos_matrix<real_type>, std::vector<real_type>, std::vector<unsigned l
         if (const std::vector<std::size_t> failed_cg_explicit_constraints = check_sizes(max_single_allocation_cg_explicit_size_per_device, max_mem_alloc_size_per_device);
             used_solver == solver_type::cg_explicit && !failed_cg_explicit_constraints.empty()) {
             // max mem alloc size constraints not fulfilled
-            detail::log(verbosity_level::full,
-                        "Cannot use cg_explicit due to maximum single memory allocation constraints on device(s) {}! Falling back to cg_implicit.\n",
-                        format_vector(failed_cg_explicit_constraints));
+            if (comm_.size() <= 1) {
+                // output only if a single MPI rank is used
+                detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "Cannot use cg_explicit due to maximum single memory allocation constraints on device(s) {}! Falling back to cg_implicit.\n",
+                                      format_vector(failed_cg_explicit_constraints));
+            }
             // can't use cg_explicit
             used_solver = solver_type::cg_implicit;
         }
         if (const std::vector<std::size_t> failed_cg_implicit_constraints = check_sizes(max_single_allocation_cg_implicit_size_per_device, max_mem_alloc_size_per_device);
             used_solver == solver_type::cg_implicit && !failed_cg_implicit_constraints.empty()) {
             // can't fulfill maximum single memory allocation size even for cg_implicit
-            plssvm::detail::log(verbosity_level::full | verbosity_level::warning,
-                                "WARNING: if you are sure that the guaranteed maximum memory allocation size can be safely ignored on your device, "
-                                "this check can be disabled via \"-DPLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE=OFF\" during the CMake configuration!\n");
+            if (comm_.size() <= 1) {
+                // output only if a single MPI rank is used
+                plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                              comm_,
+                                              "WARNING: if you are sure that the guaranteed maximum memory allocation size can be safely ignored on your device, "
+                                              "this check can be disabled via \"-DPLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE=OFF\" during the CMake configuration!\n");
+            }
             throw kernel_launch_resources{ fmt::format("Can't fulfill maximum single memory allocation constraint for device(s) {} even for the cg_implicit solver!", format_vector(failed_cg_implicit_constraints)) };
         }
 #endif
     }
 
-    detail::log(verbosity_level::full,
-                "Using {} as solver for AX=B.\n\n",
-                detail::tracking::tracking_entry{ "solver", "solver_type", used_solver });
+    if (comm_.size() <= 1) {
+        // output only if a single MPI rank is used
+        detail::log_untracked(verbosity_level::full,
+                              comm_,
+                              "Using {} as solver for AX=B.\n\n",
+                              used_solver);
+    } else {
+        // multiple MPI ranks are used -> output used solver type in a more condensed way
+        mpi::detail::gather_and_print_solver_information(comm_, used_solver);
+    }
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "solver", "solver_type", used_solver }));
 
     // perform dimensional reduction
     // note: structured binding is rejected by clang HIP compiler!
@@ -478,9 +520,22 @@ std::tuple<aos_matrix<real_type>, std::vector<real_type>, std::vector<unsigned l
     const auto assembly_duration = std::chrono::duration_cast<std::chrono::milliseconds>(assembly_end_time - assembly_start_time);
 
     if (used_solver != solver_type::cg_implicit) {
-        detail::log(verbosity_level::full | verbosity_level::timing,
-                    "Assembled the kernel matrix in {}.\n",
-                    assembly_duration);
+        if (comm_.size() > 1) {
+            // gather kernel matrix assembly runtimes from each MPI rank
+            const std::vector<std::chrono::milliseconds> durations = comm_.gather(assembly_duration);
+
+            detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "Assembled the kernel matrix in {} ({}).\n",
+                                  *std::max_element(durations.cbegin(), durations.cend()),
+                                  fmt::join(durations, "|"));
+
+        } else {
+            detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "Assembled the kernel matrix in {}.\n",
+                                  assembly_duration);
+        }
     }
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "kernel_matrix", "kernel_matrix_assembly", assembly_duration }));
 
diff --git a/include/plssvm/svm/csvr.hpp b/include/plssvm/svm/csvr.hpp
index eebb0b429..b35d55cfb 100644
--- a/include/plssvm/svm/csvr.hpp
+++ b/include/plssvm/svm/csvr.hpp
@@ -16,9 +16,9 @@
 #include "plssvm/constants.hpp"                            // plssvm::PADDING_SIZE, plssvm::real_type
 #include "plssvm/data_set/regression_data_set.hpp"         // plssvm::regression_data_set
 #include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT, plssvm::detail::tracking::tracking_entry
-#include "plssvm/exceptions/exceptions.hpp"                // plssvm::invalid_parameter_exception
+#include "plssvm/exceptions/exceptions.hpp"                // plssvm::invalid_parameter_exception, plssvm::mpi_exception
 #include "plssvm/kernel_function_types.hpp"                // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                               // plssvm::aos_matrix
 #include "plssvm/model/regression_model.hpp"               // plssvm::regression_model
@@ -104,6 +104,7 @@ class csvr : virtual public csvm {
      * @throws plssvm::invlaid_parameter_exception if the provided maximum number of iterations is less or equal than zero
      * @throws plssvm::invalid_parameter_exception if the training @p data does **not** include labels
      * @throws plssvm::exception any exception thrown in the respective backend's implementation of `plssvm::csvm::solve_lssvm_system_of_linear_equations`
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVR and the MPI communicator of the @p data set are not identical
      * @note For binary classification **always** one vs. all is used regardless of the provided parameter!
      * @return the learned model (`[[nodiscard]]`)
      */
@@ -124,6 +125,10 @@ class csvr : virtual public csvm {
         if (!data.has_labels()) {
             throw invalid_parameter_exception{ "No labels given for training! Maybe the data is only usable for prediction?" };
         }
+        // check whether the C-SVR and data set MPI communicators are identical
+        if (comm_ != data.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVR and data set must be identical!" };
+        }
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT("fit start");
 
@@ -159,6 +164,7 @@ class csvr : virtual public csvm {
 
         const std::chrono::time_point end_time = std::chrono::steady_clock::now();
         detail::log(verbosity_level::full | verbosity_level::timing,
+                    comm_,
                     "\nLearned the SVR classifier for regression in {}.\n\n",
                     detail::tracking::tracking_entry{ "cg", "total_runtime", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
 
@@ -177,6 +183,8 @@ class csvr : virtual public csvm {
      * @param[in] data the data to predict the labels for
      * @throws plssvm::invalid_parameter_exception if the number of features in the @p model's support vectors don't match the number of features in the @p data set
      * @throws plssvm::exception any exception thrown in the respective backend's implementation of `plssvm::csvm::predict_values`
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVR and the MPI communicator of the @p model set are not identical
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVR and the MPI communicator of the @p data set are not identical
      * @return the predicted labels (`[[nodiscard]]`)
      */
     template <typename label_type>
@@ -201,6 +209,14 @@ class csvr : virtual public csvm {
         if (model.num_features() != data.num_features()) {
             throw invalid_parameter_exception{ fmt::format("Number of features per data point ({}) must match the number of features per support vector of the provided model ({})!", data.num_features(), model.num_features()) };
         }
+        // check whether the C-SVR and model MPI communicators are identical
+        if (comm_ != model.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVR and model must be identical!" };
+        }
+        // check whether the C-SVR and data set MPI communicators are identical
+        if (comm_ != data.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVR and data set must be identical!" };
+        }
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT("predict start");
 
@@ -245,6 +261,7 @@ class csvr : virtual public csvm {
      * @param[in] model a previously learned model
      * @throws plssvm::invalid_parameter_exception if the @p model has no labels
      * @throws plssvm::exception any exception thrown in the respective backend's implementation of `plssvm::csvm::predict_values`
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVR and the MPI communicator of the @p model set are not identical
      * @return the regression loss of the model (`[[nodiscard]]`)
      */
     template <typename label_type>
@@ -264,6 +281,8 @@ class csvr : virtual public csvm {
      * @throws plssvm::invalid_parameter_exception if the @p data to score has no labels
      * @throws plssvm::invalid_parameter_exception if the number of features in the @p model's support vectors don't match the number of features in the @p data set
      * @throws plssvm::exception any exception thrown in the respective backend's implementation of `plssvm::csvm::predict_values`
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVR and the MPI communicator of the @p model set are not identical
+     * @throws plssvm::mpi_exception if the MPI communicator of the C-SVR and the MPI communicator of the @p data set are not identical
      * @return the regression loss of the labeled @p data (`[[nodiscard]]`)
      */
     template <typename label_type>
@@ -276,6 +295,14 @@ class csvr : virtual public csvm {
         if (model.num_features() != data.num_features()) {
             throw invalid_parameter_exception{ fmt::format("Number of features per data point ({}) must match the number of features per support vector of the provided model ({})!", data.num_features(), model.num_features()) };
         }
+        // check whether the C-SVR and model MPI communicators are identical
+        if (comm_ != model.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVR and model must be identical!" };
+        }
+        // check whether the C-SVR and data set MPI communicators are identical
+        if (comm_ != data.communicator()) {
+            throw mpi_exception{ "The MPI communicators provided to the C-SVR and data set must be identical!" };
+        }
 
         // predict labels
         const std::vector<label_type> predicted_labels = this->predict(model, data);
diff --git a/include/plssvm/verbosity_levels.hpp b/include/plssvm/verbosity_levels.hpp
index 8594d3c68..54e23705e 100644
--- a/include/plssvm/verbosity_levels.hpp
+++ b/include/plssvm/verbosity_levels.hpp
@@ -34,7 +34,7 @@ enum class verbosity_level {
     timing = 0b0010,
     /** Log all messages related to warnings. */
     warning = 0b0100,
-    /** Log all messages (i.e., timing, warning, and additional messages). */
+    /** Log all messages (i.e., timing, warning, and additional messages except LIBSVM specific messages). */
     full = 0b1000
 };
 
diff --git a/install/python_requirements.txt b/install/python_requirements.txt
index e9a7db4a2..05a1e7702 100644
--- a/install/python_requirements.txt
+++ b/install/python_requirements.txt
@@ -1,5 +1,8 @@
 ### optional and required python packages
 
+# for Python binding interoperability with MPI
+mpi4py
+
 ## for the data set generation (LIBSVM file format)
 scikit-learn
 humanize
diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index b378136e2..f27ad2d2f 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -11,34 +11,59 @@
 #include "plssvm/core.hpp"
 #include "plssvm/detail/cmd/data_set_variants.hpp"         // plssvm::detail::cmd::data_set_factory
 #include "plssvm/detail/cmd/parser_predict.hpp"            // plssvm::detail::cmd::parser_predict
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"     // plssvm::detail::log_untracked
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE,
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME
 #include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
 #include "plssvm/detail/utility.hpp"                       // PLSSVM_IS_DEFINED
+#include "plssvm/mpi/environment.hpp"                      // plssvm::mpi::is_executed_via_mpirun
 
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
 
-#include "fmt/format.h"  // fmt::print
+#include "fmt/format.h"  // fmt::format
 #include "fmt/os.h"      // fmt::ostream, fmt::output_file
 #include "fmt/ranges.h"  // fmt::join
 
 #include <chrono>       // std::chrono::{time_point, steady_clock, duration_cast, milliseconds}, std::chrono_literals namespace
 #include <cstdlib>      // EXIT_SUCCESS, EXIT_FAILURE
 #include <exception>    // std::exception
+#include <filesystem>   // std::filesystem::path
 #include <iostream>     // std::cerr, std::endl
 #include <memory>       // std::unique_ptr, std::make_unique
+#include <string>       // std::string
 #include <string_view>  // std::string_view
 #include <type_traits>  // std::remove_reference_t, std::is_same_v
+#include <utility>      // std::pair
 #include <variant>      // std::visit
 #include <vector>       // std::vector
 
 using namespace std::chrono_literals;
 
 int main(int argc, char *argv[]) {
+    // initialize MPI environment only via the plssvm::scope_guard (by explicitly specifying NO backend)
+    [[maybe_unused]] plssvm::environment::scope_guard mpi_guard{ {} };
+    // create a PLSSVM communicator -> use MPI_COMM_WORLD for our executables
+    // if MPI is not supported, does nothing
+    plssvm::mpi::communicator comm{};
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    plssvm::detail::log_untracked(plssvm::verbosity_level::full,
+                                  comm,
+                                  "Using {} MPI rank(s) for our C-SVM.\n",
+                                  comm.size());
+#else
+    if (plssvm::mpi::is_executed_via_mpirun()) {
+        plssvm::detail::log_untracked(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
+                                      comm,
+                                      "WARNING: PLSSVM was built without MPI support, but plssvm-predict was executed via mpirun! "
+                                      "As a result, each MPI process will run the same code.\n");
+    }
+#endif
+
     // create std::unique_ptr containing a plssvm::scope_guard
     // -> used to automatically handle necessary environment teardown operations
     std::unique_ptr<plssvm::environment::scope_guard> environment_guard{};
@@ -54,20 +79,30 @@ int main(int argc, char *argv[]) {
 #endif
 
         // parse SVM parameter from command line
-        const plssvm::detail::cmd::parser_predict cmd_parser{ argc, argv };
+        const plssvm::detail::cmd::parser_predict cmd_parser{ comm, argc, argv };
+
+        // add MPI related tracking entries
+        PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "mpi", "", comm }));
 
         // send warning if the build type is release and assertions are enabled
         if constexpr (std::string_view{ PLSSVM_BUILD_TYPE } == "Release" && PLSSVM_IS_DEFINED(PLSSVM_ENABLE_ASSERTS)) {
-            plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
-                                "WARNING: The build type is set to Release, but assertions are enabled. "
-                                "This may result in a noticeable performance degradation in parts of PLSSVM!\n");
+            plssvm::detail::log_untracked(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
+                                          comm,
+                                          "WARNING: The build type is set to Release, but assertions are enabled. "
+                                          "This may result in a noticeable performance degradation in parts of PLSSVM!\n");
         }
 
         // output used parameter
         plssvm::detail::log(plssvm::verbosity_level::full,
+                            comm,
                             "\ntask: prediction\n{}\n",
                             plssvm::detail::tracking::tracking_entry{ "parameter", "", cmd_parser });
 
+        // update the load balancing weights if they were provided
+        if (!cmd_parser.mpi_load_balancing_weights.empty()) {
+            comm.set_load_balancing_weights(cmd_parser.mpi_load_balancing_weights);
+        }
+
         // create data set
         const auto data_set_visitor = [&](auto &&data) {
             using label_type = typename std::remove_reference_t<decltype(data)>::label_type;
@@ -94,48 +129,51 @@ int main(int argc, char *argv[]) {
             // create default csvm
             const std::unique_ptr<csvm_type> svm = [&]() {
                 if (use_sycl_as_backend) {
-                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type);
+                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, comm, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type);
                 } else if (use_kokkos_as_backend) {
-                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, cmd_parser.target, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space);
+                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, comm, cmd_parser.target, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space);
                 } else {
-                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, cmd_parser.target);
+                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, comm, cmd_parser.target);
                 }
             }();
 
             // create model
-            const model_type model{ cmd_parser.model_filename };
+            const model_type model{ comm, cmd_parser.model_filename };
 
             // output parameter used to learn the model
             {
                 const plssvm::parameter params = model.get_params();
-                plssvm::detail::log(plssvm::verbosity_level::full,
-                                    "Parameter used to train the model:\n"
-                                    "  kernel_type: {} -> {}\n",
-                                    params.kernel_type,
-                                    plssvm::kernel_function_type_to_math_string(params.kernel_type));
+                plssvm::detail::log_untracked(plssvm::verbosity_level::full,
+                                              comm,
+                                              "Parameter used to train the model:\n"
+                                              "  kernel_type: {} -> {}\n",
+                                              params.kernel_type,
+                                              plssvm::kernel_function_type_to_math_string(params.kernel_type));
                 switch (params.kernel_type) {
                     case plssvm::kernel_function_type::linear:
                         break;
                     case plssvm::kernel_function_type::polynomial:
-                        plssvm::detail::log(plssvm::verbosity_level::full,
-                                            "  degree: {}\n"
-                                            "  gamma: {}\n"
-                                            "  coef0: {}\n",
-                                            params.degree,
-                                            plssvm::get_gamma_string(params.gamma),
-                                            params.coef0);
+                        plssvm::detail::log_untracked(plssvm::verbosity_level::full,
+                                                      comm,
+                                                      "  degree: {}\n"
+                                                      "  gamma: {}\n"
+                                                      "  coef0: {}\n",
+                                                      params.degree,
+                                                      plssvm::get_gamma_string(params.gamma),
+                                                      params.coef0);
                         break;
                     case plssvm::kernel_function_type::rbf:
                     case plssvm::kernel_function_type::laplacian:
                     case plssvm::kernel_function_type::chi_squared:
-                        plssvm::detail::log(plssvm::verbosity_level::full, "  gamma: {}\n", plssvm::get_gamma_string(params.gamma));
+                        plssvm::detail::log_untracked(plssvm::verbosity_level::full, comm, "  gamma: {}\n", plssvm::get_gamma_string(params.gamma));
                         break;
                     case plssvm::kernel_function_type::sigmoid:
-                        plssvm::detail::log(plssvm::verbosity_level::full,
-                                            "  gamma: {}\n"
-                                            "  coef0: {}\n",
-                                            plssvm::get_gamma_string(params.gamma),
-                                            params.coef0);
+                        plssvm::detail::log_untracked(plssvm::verbosity_level::full,
+                                                      comm,
+                                                      "  gamma: {}\n"
+                                                      "  coef0: {}\n",
+                                                      plssvm::get_gamma_string(params.gamma),
+                                                      params.coef0);
                         break;
                 }
             }
@@ -147,11 +185,15 @@ int main(int argc, char *argv[]) {
             {
                 const std::chrono::time_point write_start_time = std::chrono::steady_clock::now();
 
-                fmt::ostream out = fmt::output_file(cmd_parser.predict_filename);
-                out.print("{}", fmt::join(predicted_labels, "\n"));
+                // only write predict file on the main MPI rank
+                if (comm.is_main_rank()) {
+                    fmt::ostream out = fmt::output_file(cmd_parser.predict_filename);
+                    out.print("{}", fmt::join(predicted_labels, "\n"));
+                }
 
                 const std::chrono::time_point write_end_time = std::chrono::steady_clock::now();
                 plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::timing,
+                                    comm,
                                     "Write {} predictions in {} to the file '{}'.\n",
                                     plssvm::detail::tracking::tracking_entry{ "predictions_write", "num_predictions", predicted_labels.size() },
                                     plssvm::detail::tracking::tracking_entry{ "predictions_write", "time", std::chrono::duration_cast<std::chrono::milliseconds>(write_end_time - write_start_time) },
@@ -167,9 +209,9 @@ int main(int argc, char *argv[]) {
                     const plssvm::classification_report report{ correct_labels, predicted_labels };
 
                     // print complete report
-                    plssvm::detail::log(plssvm::verbosity_level::full, "\n{}\n", report);
+                    plssvm::detail::log_untracked(plssvm::verbosity_level::full, comm, "\n{}\n", report);
                     // print only accuracy for LIBSVM conformity
-                    plssvm::detail::log(plssvm::verbosity_level::libsvm, "{} (classification)\n", report.accuracy());
+                    plssvm::detail::log_untracked(plssvm::verbosity_level::libsvm, comm, "{} (classification)\n", report.accuracy());
                     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "accuracy", "achieved_accuracy", report.accuracy().achieved_accuracy }));
                     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "accuracy", "num_correct", report.accuracy().num_correct }));
                     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "accuracy", "num_total", report.accuracy().num_total }));
@@ -177,9 +219,13 @@ int main(int argc, char *argv[]) {
                     const plssvm::regression_report report{ correct_labels, predicted_labels };
 
                     // print complete report
-                    plssvm::detail::log(plssvm::verbosity_level::full, "\n{}\n", report);
+                    plssvm::detail::log_untracked(plssvm::verbosity_level::full, comm, "\n{}\n", report);
                     // print only MSE and SCC for LIBSVM conformity
-                    plssvm::detail::log(plssvm::verbosity_level::libsvm, "Mean squared error = {} (regression)\nSquared correlation coefficient = {} (regression)\n", report.loss().mean_squared_error, report.loss().squared_correlation_coefficient);
+                    plssvm::detail::log_untracked(plssvm::verbosity_level::libsvm,
+                                                  comm,
+                                                  "Mean squared error = {} (regression)\nSquared correlation coefficient = {} (regression)\n",
+                                                  report.loss().mean_squared_error,
+                                                  report.loss().squared_correlation_coefficient);
 
                     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "loss", "explained_variance_score", report.loss().explained_variance_score }));
                     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "loss", "mean_absolute_error", report.loss().mean_absolute_error }));
@@ -192,7 +238,7 @@ int main(int argc, char *argv[]) {
                 }
             }
         };
-        std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser));
+        std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(comm, cmd_parser));
 
         // stop CPU hardware sampler and dump results if available
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
@@ -202,16 +248,37 @@ int main(int argc, char *argv[]) {
 
         const std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now();
         plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::timing,
+                            comm,
                             "\nTotal runtime: {}\n",
                             plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+        if (cmd_parser.performance_tracking_filename.empty()) {
+            // be sure that the output tracking results are correctly serialized
+            comm.serialize([&]() {
+                PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
+            });
+        } else {
+            // update filename with MY MPI rank
+            std::filesystem::path path{ cmd_parser.performance_tracking_filename };
+            path.replace_filename(fmt::format("{}.{}{}", path.stem(), comm.rank(), path.extension()));
+            // output to all files in parallel
+            PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(path.string());
+        }
+#else
+        // if not compiled with MPI, simply output the tracking information
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
+#endif
 
+    } catch (const plssvm::cmd_parser_exit &e) {
+        // something inside the cmd parser went wrong
+        // -> don't call std::exit directly to gracefully tear down the environment
+        return e.exit_code();
     } catch (const plssvm::exception &e) {
-        std::cerr << e.what_with_loc() << std::endl;
+        std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what_with_loc()) << std::endl;
         return EXIT_FAILURE;
     } catch (const std::exception &e) {
-        std::cerr << e.what() << std::endl;
+        std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what()) << std::endl;
         return EXIT_FAILURE;
     }
 
diff --git a/src/main_scale.cpp b/src/main_scale.cpp
index 365f2eb2c..413ca4d72 100644
--- a/src/main_scale.cpp
+++ b/src/main_scale.cpp
@@ -11,7 +11,8 @@
 #include "plssvm/core.hpp"
 #include "plssvm/detail/cmd/data_set_variants.hpp"         // plssvm::detail::cmd::data_set_factory
 #include "plssvm/detail/cmd/parser_scale.hpp"              // plssvm::detail::cmd::parser_scale
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"     // plssvm::detail::log_untracked
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE,
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME
 #include "plssvm/detail/utility.hpp"                       // PLSSVM_IS_DEFINED
@@ -20,20 +21,33 @@
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
 
-#include <algorithm>   // std::for_each
-#include <chrono>      // std::chrono::{steady_clock, duration}, std::chrono_literals namespace
-#include <cstddef>     // std::size_t
-#include <cstdlib>     // std::exit, EXIT_SUCCESS, EXIT_FAILURE
-#include <exception>   // std::exception
-#include <functional>  // std::mem_fn
-#include <iostream>    // std::cerr, std::endl
-#include <utility>     // std::pair
-#include <variant>     // std::visit
-#include <vector>      // std::vector
+#include "fmt/format.h"  // fmt::format
+
+#include <chrono>     // std::chrono::{steady_clock, duration}, std::chrono_literals namespace
+#include <cstddef>    // std::size_t
+#include <cstdlib>    // EXIT_SUCCESS, EXIT_FAILURE
+#include <exception>  // std::exception
+#include <iostream>   // std::cerr, std::endl
+#include <variant>    // std::visit
+#include <vector>     // std::vector
 
 using namespace std::chrono_literals;
 
 int main(int argc, char *argv[]) {
+    // initialize MPI environment only via the plssvm::scope_guard (by explicitly specifying NO backend)
+    [[maybe_unused]] plssvm::environment::scope_guard mpi_guard{ {} };
+    // create a PLSSVM communicator -> use MPI_COMM_WORLD for our executables
+    // if MPI is not supported, does nothing
+    const plssvm::mpi::communicator comm{};
+
+    // plssvm-scale ONLY supports one MPI rank
+    if (comm.size() > std::size_t{ 1 }) {
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format("Currently, plssvm-scale only supports a single MPI process, but {} where used!", comm.size()) << std::endl;
+        }
+        return EXIT_FAILURE;
+    }
+
     try {
         const std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME(start_time);
@@ -45,17 +59,19 @@ int main(int argc, char *argv[]) {
 #endif
 
         // create default parameters
-        const plssvm::detail::cmd::parser_scale cmd_parser{ argc, argv };
+        const plssvm::detail::cmd::parser_scale cmd_parser{ comm, argc, argv };
 
         // send warning if the build type is release and assertions are enabled
         if constexpr (std::string_view{ PLSSVM_BUILD_TYPE } == "Release" && PLSSVM_IS_DEFINED(PLSSVM_ENABLE_ASSERTS)) {
-            plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
-                                "WARNING: The build type is set to Release, but assertions are enabled. "
-                                "This may result in a noticeable performance degradation in parts of PLSSVM!\n");
+            plssvm::detail::log_untracked(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
+                                          comm,
+                                          "WARNING: The build type is set to Release, but assertions are enabled. "
+                                          "This may result in a noticeable performance degradation in parts of PLSSVM!\n");
         }
 
         // output used parameter
         plssvm::detail::log(plssvm::verbosity_level::full,
+                            comm,
                             "\ntask: scaling\n{}\n",
                             plssvm::detail::tracking::tracking_entry{ "parameter", "", cmd_parser });
 
@@ -89,7 +105,7 @@ int main(int argc, char *argv[]) {
                 data.scaling_factors()->get().save(cmd_parser.save_filename);
             }
         };
-        std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser));
+        std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(comm, cmd_parser));
 
         // stop CPU hardware sampler and dump results if available
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
@@ -99,16 +115,21 @@ int main(int argc, char *argv[]) {
 
         const std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now();
         plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::timing,
+                            comm,
                             "\nTotal runtime: {}\n",
                             plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
 
+    } catch (const plssvm::cmd_parser_exit &e) {
+        // something inside the cmd parser went wrong
+        // -> don't call std::exit directly to gracefully tear down the environment
+        return e.exit_code();
     } catch (const plssvm::exception &e) {
-        std::cerr << e.what_with_loc() << std::endl;
+        std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what_with_loc()) << std::endl;
         return EXIT_FAILURE;
     } catch (const std::exception &e) {
-        std::cerr << e.what() << std::endl;
+        std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what()) << std::endl;
         return EXIT_FAILURE;
     }
 
diff --git a/src/main_train.cpp b/src/main_train.cpp
index 488507374..cf4893946 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -11,24 +11,30 @@
 #include "plssvm/core.hpp"
 #include "plssvm/detail/cmd/data_set_variants.hpp"         // plssvm::detail::cmd::data_set_factory
 #include "plssvm/detail/cmd/parser_train.hpp"              // plssvm::detail::cmd::parser_train
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"     // plssvm::detail::log_untracked
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE,
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME
 #include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
 #include "plssvm/detail/utility.hpp"                       // PLSSVM_IS_DEFINED
+#include "plssvm/mpi/environment.hpp"                      // plssvm::mpi::is_executed_via_mpirun
 
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
 
+#include "fmt/format.h"  // fmt::format
+
 #include <chrono>       // std::chrono::{time_point, steady_clock, duration_cast, milliseconds}, std::chrono_literals namespace
 #include <cstddef>      // std::size_t
 #include <cstdlib>      // EXIT_SUCCESS, EXIT_FAILURE
 #include <exception>    // std::exception
+#include <filesystem>   // std::filesystem::path
 #include <iostream>     // std::cerr, std::endl
 #include <memory>       // std::unique_ptr, std::make_unique
+#include <string>       // std::string
 #include <string_view>  // std::string_view
-#include <type_traits>  // std::remove_reference_t, std::is_same_v
+#include <type_traits>  // std::remove_reference_t
 #include <variant>      // std::visit
 #include <vector>       // std::vector
 
@@ -71,6 +77,26 @@ template <typename svm_type, typename label_type>
 }
 
 int main(int argc, char *argv[]) {
+    // initialize MPI environment only via the plssvm::scope_guard (by explicitly specifying NO backend)
+    [[maybe_unused]] plssvm::environment::scope_guard mpi_guard{ {} };
+    // create a PLSSVM communicator -> use MPI_COMM_WORLD for our executables
+    // if MPI is not supported, does nothing
+    plssvm::mpi::communicator comm{};
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    plssvm::detail::log_untracked(plssvm::verbosity_level::full,
+                                  comm,
+                                  "Using {} MPI rank(s) for our C-SVM.\n",
+                                  comm.size());
+#else
+    if (plssvm::mpi::is_executed_via_mpirun()) {
+        plssvm::detail::log_untracked(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
+                                      comm,
+                                      "WARNING: PLSSVM was built without MPI support, but plssvm-train was executed via mpirun! "
+                                      "As a result, each MPI process will run the same code.\n");
+    }
+#endif
+
     // create std::unique_ptr containing a plssvm::scope_guard
     // -> used to automatically handle necessary environment teardown operations
     std::unique_ptr<plssvm::environment::scope_guard> environment_guard{};
@@ -86,21 +112,31 @@ int main(int argc, char *argv[]) {
 #endif
 
         // parse SVM parameter from command line
-        plssvm::detail::cmd::parser_train cmd_parser{ argc, argv };
+        const plssvm::detail::cmd::parser_train cmd_parser{ comm, argc, argv };
+
+        // add MPI related tracking entries
+        PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "mpi", "", comm }));
 
         // send warning if the build type is release and assertions are enabled
         if constexpr (std::string_view{ PLSSVM_BUILD_TYPE } == "Release" && PLSSVM_IS_DEFINED(PLSSVM_ENABLE_ASSERTS)) {
-            plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
-                                "WARNING: The build type is set to Release, but assertions are enabled. "
-                                "This may result in a noticeable performance degradation in parts of PLSSVM!\n");
+            plssvm::detail::log_untracked(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
+                                          comm,
+                                          "WARNING: The build type is set to Release, but assertions are enabled. "
+                                          "This may result in a noticeable performance degradation in parts of PLSSVM!\n");
         }
 
         // output used parameter
         plssvm::detail::log(plssvm::verbosity_level::full,
+                            comm,
                             "\ntask: training ({})\n{}\n\n\n",
                             plssvm::svm_type_to_task_name(cmd_parser.svm),
                             plssvm::detail::tracking::tracking_entry{ "parameter", "", cmd_parser });
 
+        // update the load balancing weights if they were provided
+        if (!cmd_parser.mpi_load_balancing_weights.empty()) {
+            comm.set_load_balancing_weights(cmd_parser.mpi_load_balancing_weights);
+        }
+
         // create data set
         const auto data_set_visitor = [&](auto &&data) {
             using label_type = typename std::remove_reference_t<decltype(data)>::label_type;
@@ -127,11 +163,11 @@ int main(int argc, char *argv[]) {
             // create SVM
             const std::unique_ptr<csvm_type> svm = [&]() {
                 if (use_sycl_as_backend) {
-                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type);
+                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, comm, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type);
                 } else if (use_kokkos_as_backend) {
-                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space);
+                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, comm, cmd_parser.target, cmd_parser.csvm_params, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space);
                 } else {
-                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params);
+                    return plssvm::make_csvm<csvm_type>(cmd_parser.backend, comm, cmd_parser.target, cmd_parser.csvm_params);
                 }
             }();
 
@@ -149,7 +185,7 @@ int main(int argc, char *argv[]) {
             // save model to file
             model.save(cmd_parser.model_filename);
         };
-        std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser));
+        std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(comm, cmd_parser));
 
         // stop CPU hardware sampler and dump results if available
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
@@ -157,18 +193,42 @@ int main(int argc, char *argv[]) {
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY(sampler);
 #endif
 
+        // wait until all MPI processes reach this point
+        comm.barrier();
+
         const std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now();
         plssvm::detail::log(plssvm::verbosity_level::full,
+                            comm,
                             "\nTotal runtime: {}\n",
                             plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+        if (cmd_parser.performance_tracking_filename.empty()) {
+            // be sure that the output tracking results are correctly serialized
+            comm.serialize([&]() {
+                PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
+            });
+        } else {
+            // update filename with MY MPI rank
+            std::filesystem::path path{ cmd_parser.performance_tracking_filename };
+            path.replace_filename(fmt::format("{}.{}{}", path.stem(), comm.rank(), path.extension()));
+            // output to all files in parallel
+            PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(path.string());
+        }
+#else
+        // if not compiled with MPI, simply output the tracking information
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
+#endif
 
+    } catch (const plssvm::cmd_parser_exit &e) {
+        // something inside the cmd parser went wrong
+        // -> don't call std::exit directly to gracefully tear down the environment
+        return e.exit_code();
     } catch (const plssvm::exception &e) {
-        std::cerr << e.what_with_loc() << std::endl;
+        std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what_with_loc()) << std::endl;
         return EXIT_FAILURE;
     } catch (const std::exception &e) {
-        std::cerr << e.what() << std::endl;
+        std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what()) << std::endl;
         return EXIT_FAILURE;
     }
 
diff --git a/src/plssvm/backends/CUDA/csvm.cu b/src/plssvm/backends/CUDA/csvm.cu
index 54e3d57b2..a783d63a2 100644
--- a/src/plssvm/backends/CUDA/csvm.cu
+++ b/src/plssvm/backends/CUDA/csvm.cu
@@ -20,12 +20,14 @@
 #include "plssvm/constants.hpp"                                                     // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
 #include "plssvm/detail/assert.hpp"                                                 // PLSSVM_ASSERT
 #include "plssvm/detail/data_distribution.hpp"                                      // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
-#include "plssvm/detail/logging.hpp"                                                // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"                              // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                                            // plssvm::detail::memory_size
 #include "plssvm/detail/tracking/performance_tracker.hpp"                           // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY
 #include "plssvm/exceptions/exceptions.hpp"                                         // plssvm::exception
 #include "plssvm/gamma.hpp"                                                         // plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"                                         // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                                              // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"                                        // plssvm::mpi::detail::gather_and_print_csvm_information
 #include "plssvm/parameter.hpp"                                                     // plssvm::parameter
 #include "plssvm/shape.hpp"                                                         // plssvm::shape
 #include "plssvm/target_platforms.hpp"                                              // plssvm::target_platform
@@ -43,6 +45,7 @@
 #include <iostream>   // std::cout, std::endl
 #include <numeric>    // std::iota
 #include <string>     // std::string
+#include <utility>    // std::move
 #include <variant>    // std::get
 #include <vector>     // std:vector
 
@@ -58,12 +61,6 @@ csvm::csvm(const target_platform target) {
 #endif
     }
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing CUDA ({}) as backend.\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "cuda_runtime_version", detail::get_runtime_version() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::cuda }));
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", plssvm::target_platform::gpu_nvidia }));
-
     // update the target platform
     target_ = plssvm::target_platform::gpu_nvidia;
 
@@ -76,26 +73,50 @@ csvm::csvm(const target_platform target) {
         throw backend_exception{ "CUDA backend selected but no CUDA capable devices were found!" };
     }
 
-    // print found CUDA devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} CUDA device(s):\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() });
-    std::vector<std::string> device_names;
+    std::vector<std::string> device_names{};
     device_names.reserve(devices_.size());
-    for (const queue_type &device : devices_) {
-        cudaDeviceProp prop{};
-        PLSSVM_CUDA_ERROR_CHECK(cudaGetDeviceProperties(&prop, device))
-        plssvm::detail::log(verbosity_level::full,
-                            "  [{}, {}, {}.{}]\n",
-                            device,
-                            prop.name,
-                            prop.major,
-                            prop.minor);
-        device_names.emplace_back(prop.name);
+
+    if (comm_.size() > 1) {
+        // use MPI rank specific command line output
+        for (const queue_type &device : devices_) {
+            cudaDeviceProp prop{};
+            PLSSVM_CUDA_ERROR_CHECK(cudaGetDeviceProperties(&prop, device))
+            device_names.emplace_back(prop.name);
+        }
+
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::cuda, target_, device_names);
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing CUDA ({}) as backend.\n"
+                                      "Found {} CUDA device(s):\n",
+                                      detail::get_runtime_version(),
+                                      devices_.size());
+
+        for (const queue_type &device : devices_) {
+            cudaDeviceProp prop{};
+            PLSSVM_CUDA_ERROR_CHECK(cudaGetDeviceProperties(&prop, device))
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "  [{}, {}, {}.{}]\n",
+                                          device,
+                                          prop.name,
+                                          prop.major,
+                                          prop.minor);
+            device_names.emplace_back(prop.name);
+        }
     }
+
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
+
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "cuda_runtime_version", detail::get_runtime_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::cuda }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() }));
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names }));
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
 }
 
 csvm::~csvm() {
diff --git a/src/plssvm/backends/HIP/CMakeLists.txt b/src/plssvm/backends/HIP/CMakeLists.txt
index d7859fea3..4c6562c06 100644
--- a/src/plssvm/backends/HIP/CMakeLists.txt
+++ b/src/plssvm/backends/HIP/CMakeLists.txt
@@ -9,54 +9,81 @@ list(APPEND CMAKE_MESSAGE_INDENT "HIP:  ")
 # check if HIP can be enabled
 message(CHECK_START "Checking for HIP backend")
 
-# if both NVIDIA and AMD targets are available, the HIP backend is disabled
-if (DEFINED PLSSVM_AMD_TARGET_ARCHS AND DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
-    message(SEND_ERROR "Found AMD and NVIDIA targets, but only one of them are supported for the HIP backend!")
-    message(CHECK_FAIL "skipped")
-    return()
-endif ()
-
-# set used HIP target based on the provided PLSSVM target archs
-if (DEFINED PLSSVM_AMD_TARGET_ARCHS)
-    set_local_and_parent(PLSSVM_HIP_BACKEND_GPU_RUNTIME "HIP")
-elseif (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
-    set_local_and_parent(PLSSVM_HIP_BACKEND_GPU_RUNTIME "CUDA")
-else ()
-    if (PLSSVM_ENABLE_HIP_BACKEND MATCHES "ON")
-        message(SEND_ERROR "Found no AMD or NVIDIA targets for the requested HIP backend!")
-    else ()
-        message(STATUS "Found HIP backend, but no \"amd\" targets were specified!")
-    endif ()
-    message(CHECK_FAIL "skipped")
-    return()
-endif ()
-
 # check if HIP_PLATFORM is provided as environment variable
 if (DEFINED ENV{HIP_PLATFORM})
-    # check if the environment variable is correctly defined
-    if ((DEFINED PLSSVM_AMD_TARGET_ARCHS AND NOT $ENV{HIP_PLATFORM} MATCHES "amd") OR (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS AND NOT $ENV{HIP_PLATFORM} MATCHES
-                                                                                                                              "nvidia")
-    )
+    # the HIP_PLATFORM environment variable is defined
+    if ("$ENV{HIP_PLATFORM}" STREQUAL "amd")
+        # we want to target AMD GPUs according to the provided HIP_PLATFORM
+        if (DEFINED PLSSVM_AMD_TARGET_ARCHS)
+            # found AMD target
+            set_local_and_parent(PLSSVM_HIP_BACKEND_GPU_RUNTIME "HIP")
+        else ()
+            # no AMD targets provided
+            if (PLSSVM_ENABLE_HIP_BACKEND MATCHES "ON")
+                message(SEND_ERROR "The \"HIP_PLATFORM\" is \"amd\" but no \"amd\" target was provided for the requested HIP backend!")
+            else ()
+                message(STATUS "The \"HIP_PLATFORM\" is \"amd\" but no \"amd\" target was provided for the HIP backend!")
+            endif ()
+            message(CHECK_FAIL "skipped")
+            return()
+        endif ()
+    elseif ("$ENV{HIP_PLATFORM}" STREQUAL "nvidia")
+        # we want to target NVIDIA GPUs according to the provided HIP_PLATFORM
+        if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
+            # found NVIDIA target
+            set_local_and_parent(PLSSVM_HIP_BACKEND_GPU_RUNTIME "CUDA")
+        else ()
+            # no NVIDIA targets provided
+            if (PLSSVM_ENABLE_HIP_BACKEND MATCHES "ON")
+                message(SEND_ERROR "The \"HIP_PLATFORM\" is \"nvidia\" but no \"nvidia\" target was provided for the requested HIP backend!")
+            else ()
+                message(STATUS "The \"HIP_PLATFORM\" is \"nvidia\" but no \"nvidia\" target was provided for the HIP backend!")
+            endif ()
+            message(CHECK_FAIL "skipped")
+            return()
+        endif ()
+    else ()
+        # invalid HIP_PLATFORM
         if (PLSSVM_ENABLE_HIP_BACKEND MATCHES "ON")
-            message(SEND_ERROR "Found invalid \"HIP_PLATFORM\" value ($ENV{HIP_PLATFORM}) for the requested HIP backend!")
+            message(SEND_ERROR "Found invalid \"HIP_PLATFORM\" value \"$ENV{HIP_PLATFORM}\" for the requested HIP backend!")
         else ()
-            message(STATUS "Found invalid \"HIP_PLATFORM\" value ($ENV{HIP_PLATFORM}) for the HIP backend!")
+            message(STATUS "Found invalid \"HIP_PLATFORM\" value \"$ENV{HIP_PLATFORM}\" for the HIP backend!")
         endif ()
         message(CHECK_FAIL "skipped")
         return()
-    else ()
-        message(STATUS "Using \"HIP_PLATFORM=$ENV{HIP_PLATFORM}\".")
     endif ()
 else ()
-    # environment variable is not defined -> we set it
-    if (DEFINED PLSSVM_AMD_TARGET_ARCHS)
+    # environment variable not defined -> try inferring it using the provided PLSSVM target platforms
+    if (DEFINED PLSSVM_AMD_TARGET_ARCHS AND DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
+        # we try to target NVIDIA and AMD GPUs
+        if (PLSSVM_ENABLE_HIP_BACKEND MATCHES "ON")
+            message(SEND_ERROR "Found \"amd\" and \"nvidia\" targets for the requested HIP backend, but only one of them are supported for the HIP backend!")
+        else ()
+            message(STATUS "Found \"amd\" and \"nvidia\" targets for the HIP backend, but only one of them are supported for the HIP backend.")
+        endif ()
+        message(CHECK_FAIL "skipped")
+        return()
+    elseif (DEFINED PLSSVM_AMD_TARGET_ARCHS)
+        # we target AMD GPUs but NO NVIDIA GPUs -> infer HIP_PLATFORM as amd
         set(ENV{HIP_PLATFORM} "amd")
         set(HIP_PLATFORM "amd" CACHE STRING "set the HIP_PLATFORM to AMD" FORCE)
         set(CMAKE_HIP_PLATFORM "amd" CACHE STRING "set the HIP_PLATFORM to AMD" FORCE)
+        set_local_and_parent(PLSSVM_HIP_BACKEND_GPU_RUNTIME "HIP")
     elseif (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
+        # we target NVIDIA GPUs but NO AMD GPUs -> infer HIP_PLATFORM as nvidia
         set(ENV{HIP_PLATFORM} "nvidia")
         set(HIP_PLATFORM "nvidia" CACHE STRING "set the HIP_PLATFORM to NVIDIA" FORCE)
         set(CMAKE_HIP_PLATFORM "nvidia" CACHE STRING "set the HIP_PLATFORM to NVIDIA" FORCE)
+        set_local_and_parent(PLSSVM_HIP_BACKEND_GPU_RUNTIME "CUDA")
+    else ()
+        # no AMD or NVIDIA GPUs requested -> can't find HIP
+        if (PLSSVM_ENABLE_HIP_BACKEND MATCHES "ON")
+            message(SEND_ERROR "Found no \"amd\" or \"nvidia\" targets for the requested HIP backend!")
+        else ()
+            message(STATUS "Found no \"amd\" or \"nvidia\" targets for the HIP backend.")
+        endif ()
+        message(CHECK_FAIL "skipped")
+        return()
     endif ()
     message(STATUS "Environment variable \"HIP_PLATFORM\" is not defined. Setting it to \"$ENV{HIP_PLATFORM}\"")
 endif ()
@@ -73,7 +100,8 @@ if (NOT CMAKE_${PLSSVM_HIP_BACKEND_GPU_RUNTIME}_COMPILER)
 endif ()
 
 enable_language(${PLSSVM_HIP_BACKEND_GPU_RUNTIME})
-find_package(HIP QUIET)
+# always necessary (also with HIP_PLATFORM nvidia) in order to be able to link against hip:: targets
+find_package(HIP)
 if (NOT HIP_FOUND)
     if (PLSSVM_ENABLE_HIP_BACKEND MATCHES "ON")
         message(SEND_ERROR "Cannot find requested backend: HIP!")
diff --git a/src/plssvm/backends/HIP/csvm.hip b/src/plssvm/backends/HIP/csvm.hip
index 87030c623..c17f4db59 100644
--- a/src/plssvm/backends/HIP/csvm.hip
+++ b/src/plssvm/backends/HIP/csvm.hip
@@ -20,12 +20,14 @@
 #include "plssvm/constants.hpp"                                                        // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
 #include "plssvm/detail/assert.hpp"                                                    // PLSSVM_ASSERT
 #include "plssvm/detail/data_distribution.hpp"                                         // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
-#include "plssvm/detail/logging.hpp"                                                   // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"                                 // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                                               // plssvm::detail::memory_size
 #include "plssvm/detail/tracking/performance_tracker.hpp"                              // plssvm::detail::tracking::tracking_entry
 #include "plssvm/exceptions/exceptions.hpp"                                            // plssvm::exception
 #include "plssvm/gamma.hpp"                                                            // plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"                                            // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                                                 // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"                                           // plssvm::mpi::detail::gather_and_print_csvm_information
 #include "plssvm/parameter.hpp"                                                        // plssvm::parameter, plssvm::detail::parameter
 #include "plssvm/shape.hpp"                                                            // plssvm::shape
 #include "plssvm/target_platforms.hpp"                                                 // plssvm::target_platform
@@ -66,14 +68,8 @@ csvm::csvm(const target_platform target) {
     }
 #endif
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing HIP ({}; runtime: {}) as backend.\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "hip_runtime_version", detail::get_runtime_version() },
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "hip_runtime", detail::get_runtime() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::hip }));
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", plssvm::target_platform::gpu_amd }));
-
     // update the target platform
+    target_ = target;
     if (target_ == target_platform::automatic) {
 #if defined(PLSSVM_HIP_BACKEND_USE_HIP_RUNTIME)
         target_ = plssvm::target_platform::gpu_amd;
@@ -91,26 +87,52 @@ csvm::csvm(const target_platform target) {
         throw backend_exception{ "HIP backend selected but no HIP capable devices were found!" };
     }
 
-    // print found HIP devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} HIP device(s):\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() });
-    std::vector<std::string> device_names;
+    std::vector<std::string> device_names{};
     device_names.reserve(devices_.size());
-    for (const queue_type &device : devices_) {
-        hipDeviceProp_t prop{};
-        PLSSVM_HIP_ERROR_CHECK(hipGetDeviceProperties(&prop, device))
-        plssvm::detail::log(verbosity_level::full,
-                            "  [{}, {}, {}.{}]\n",
-                            device,
-                            prop.name,
-                            prop.major,
-                            prop.minor);
-        device_names.emplace_back(prop.name);
+
+    if (comm_.size() > 1) {
+        // use MPI rank specific command line output
+        for (const queue_type &device : devices_) {
+            hipDeviceProp_t prop{};
+            PLSSVM_HIP_ERROR_CHECK(hipGetDeviceProperties(&prop, device))
+            device_names.emplace_back(prop.name);
+        }
+
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::hip, target_, device_names);
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing HIP ({}; runtime: {}) as backend.\n"
+                                      "Found {} HIP device(s):\n",
+                                      detail::get_runtime_version(),
+                                      detail::get_runtime(),
+                                      devices_.size());
+
+        for (const queue_type &device : devices_) {
+            hipDeviceProp_t prop{};
+            PLSSVM_HIP_ERROR_CHECK(hipGetDeviceProperties(&prop, device))
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "  [{}, {}, {}.{}]\n",
+                                          device,
+                                          prop.name,
+                                          prop.major,
+                                          prop.minor);
+            device_names.emplace_back(prop.name);
+        }
     }
+
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
+
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "hip_runtime_version", detail::get_runtime_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "hip_runtime", detail::get_runtime() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::hip }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() }));
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names }));
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
 }
 
 csvm::~csvm() {
diff --git a/src/plssvm/backends/HPX/csvm.cpp b/src/plssvm/backends/HPX/csvm.cpp
index bce7ff88d..23f68eec8 100644
--- a/src/plssvm/backends/HPX/csvm.cpp
+++ b/src/plssvm/backends/HPX/csvm.cpp
@@ -16,18 +16,23 @@
 #include "plssvm/backends/HPX/kernel/predict_kernel.hpp"                           // plssvm::hpx::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
 #include "plssvm/constants.hpp"                                                    // plssvm::real_type
 #include "plssvm/detail/assert.hpp"                                                // PLSSVM_ASSERT
-#include "plssvm/detail/data_distribution.hpp"                                     // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
+#include "plssvm/detail/data_distribution.hpp"                                     // plssvm::detail::triangular_data_distribution
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"                             // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                                           // plssvm::detail::memory_size
 #include "plssvm/detail/move_only_any.hpp"                                         // plssvm::detail::{move_only_any, move_only_any_cast}
 #include "plssvm/detail/utility.hpp"                                               // plssvm::detail::{get_system_memory, unreachable}
 #include "plssvm/kernel_function_types.hpp"                                        // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                                                       // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/mpi/communicator.hpp"                                             // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"                                       // plssvm::mpi::detail::gather_and_print_csvm_information
 #include "plssvm/parameter.hpp"                                                    // plssvm::parameter
 #include "plssvm/shape.hpp"                                                        // plssvm::shape
 #include "plssvm/solver_types.hpp"                                                 // plssvm::solver_type
 #include "plssvm/svm/csvm.hpp"                                                     // plssvm::csvm
 #include "plssvm/target_platforms.hpp"                                             // plssvm::target_platform
 
+#include "hpx/future.hpp"  // hpx::future, hpx::async
+
 #include <cstddef>  // std::size_t
 #include <tuple>    // std::tuple, std::make_tuple
 #include <utility>  // std::move
@@ -45,15 +50,28 @@ csvm::csvm(const target_platform target) {
     throw backend_exception{ "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!" };
 #endif
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing HPX ({}) as backend with {} thread(s).\n\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "hpx_version", detail::get_hpx_version() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_threads", detail::get_num_threads() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::hpx }));
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", plssvm::target_platform::cpu }));
-
     // update the target platform
     target_ = plssvm::target_platform::cpu;
+
+    if (comm_.size() > 1) {
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::hpx, target_);
+    } else {
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing HPX ({}) as backend with {} thread(s).\n",
+                                      detail::get_hpx_version(),
+                                      detail::get_num_threads());
+    }
+
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
+
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "hpx_version", detail::get_hpx_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::hpx }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_threads", detail::get_num_threads() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() }));
 }
 
 csvm::~csvm() = default;
@@ -77,48 +95,61 @@ std::vector<::plssvm::detail::move_only_any> csvm::assemble_kernel_matrix(const
     PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
     PLSSVM_ASSERT(q_red.size() == A.num_rows() - 1, "The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!", q_red.size(), A.num_rows() - 1);
 
+    // update the data distribution: only the upper triangular kernel matrix is used
+    // note: account for the dimensional reduction
+    data_distribution_ = std::make_unique<::plssvm::detail::triangular_data_distribution>(comm_, A.num_rows() - 1, this->num_available_devices());
+    // get the triangular data distribution
+    const ::plssvm::detail::triangular_data_distribution &dist = dynamic_cast<::plssvm::detail::triangular_data_distribution &>(*data_distribution_);
+
     std::vector<::plssvm::detail::move_only_any> kernel_matrices_parts(this->num_available_devices());
+    const real_type cost = real_type{ 1.0 } / params.cost;
+
     ::hpx::future<void> wait = ::hpx::async([&]() {
-        const real_type cost = real_type{ 1.0 } / params.cost;
-
-        switch (solver) {
-            case solver_type::automatic:
-                // unreachable
-                break;
-            case solver_type::cg_explicit:
-                {
-                    const plssvm::detail::triangular_data_distribution dist{ A.num_rows() - 1, this->num_available_devices() };
-                    std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
-                    switch (params.kernel_type) {
-                        case kernel_function_type::linear:
-                            detail::device_kernel_assembly<kernel_function_type::linear>(q_red, kernel_matrix, A, QA_cost, cost);
-                            break;
-                        case kernel_function_type::polynomial:
-                            detail::device_kernel_assembly<kernel_function_type::polynomial>(q_red, kernel_matrix, A, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
-                            break;
-                        case kernel_function_type::rbf:
-                            detail::device_kernel_assembly<kernel_function_type::rbf>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                            break;
-                        case kernel_function_type::sigmoid:
-                            detail::device_kernel_assembly<kernel_function_type::sigmoid>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
-                            break;
-                        case kernel_function_type::laplacian:
-                            detail::device_kernel_assembly<kernel_function_type::laplacian>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                            break;
-                        case kernel_function_type::chi_squared:
-                            detail::device_kernel_assembly<kernel_function_type::chi_squared>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                            break;
-                    }
+        if (dist.place_specific_num_rows(0) > std::size_t{ 0 }) {
+            switch (solver) {
+                case solver_type::automatic:
+                    // unreachable
+                    break;
+                case solver_type::cg_explicit:
+                    {
+                        // calculate the number of data points this device is responsible for
+                        const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
 
-                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
-                }
-                break;
-            case solver_type::cg_implicit:
-                {
-                    // simply return data since in implicit we don't assembly the kernel matrix here!
-                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
-                }
-                break;
+                        // get the offset of the data points this device is responsible for
+                        const std::size_t row_offset = dist.place_row_offset(0);
+
+                        std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+                        switch (params.kernel_type) {
+                            case kernel_function_type::linear:
+                                detail::device_kernel_assembly<kernel_function_type::linear>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost);
+                                break;
+                            case kernel_function_type::polynomial:
+                                detail::device_kernel_assembly<kernel_function_type::polynomial>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                                break;
+                            case kernel_function_type::rbf:
+                                detail::device_kernel_assembly<kernel_function_type::rbf>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma));
+                                break;
+                            case kernel_function_type::sigmoid:
+                                detail::device_kernel_assembly<kernel_function_type::sigmoid>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
+                                break;
+                            case kernel_function_type::laplacian:
+                                detail::device_kernel_assembly<kernel_function_type::laplacian>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma));
+                                break;
+                            case kernel_function_type::chi_squared:
+                                detail::device_kernel_assembly<kernel_function_type::chi_squared>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma));
+                                break;
+                        }
+
+                        kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
+                    }
+                    break;
+                case solver_type::cg_implicit:
+                    {
+                        // simply return data since in implicit we don't assembly the kernel matrix here!
+                        kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
+                    }
+                    break;
+            }
         }
     });
     // wait until operation is completed
@@ -136,51 +167,80 @@ void csvm::blas_level_3(const solver_type solver, const real_type alpha, const s
     PLSSVM_ASSERT(B.shape() == C.shape(), "The B ({}) and C ({}) matrices must have the same shape!", B.shape(), C.shape());
     PLSSVM_ASSERT(B.padding() == C.padding(), "The B ({}) and C ({}) matrices must have the same padding!", B.padding(), C.padding());
 
+    using namespace operators;
+
+    // get the triangular data distribution
+    const ::plssvm::detail::triangular_data_distribution &dist = dynamic_cast<::plssvm::detail::triangular_data_distribution &>(*data_distribution_);
+
     ::hpx::future<void> wait = ::hpx::async([&]() {
-        switch (solver) {
-            case solver_type::automatic:
-                // unreachable
-                break;
-            case solver_type::cg_explicit:
-                {
-                    const std::size_t num_rhs = B.shape().x;
-                    const std::size_t num_rows = B.shape().y;
-
-                    const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
-                    PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
-
-                    detail::device_kernel_symm(num_rows, num_rhs, alpha, explicit_A, B, beta, C);
-                }
-                break;
-            case solver_type::cg_implicit:
-                {
-                    const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
-                    PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
-                    PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
-                    const real_type cost = real_type{ 1.0 } / params.cost;
-
-                    switch (params.kernel_type) {
-                        case kernel_function_type::linear:
-                            detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C);
-                            break;
-                        case kernel_function_type::polynomial:
-                            detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
-                            break;
-                        case kernel_function_type::rbf:
-                            detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                            break;
-                        case kernel_function_type::sigmoid:
-                            detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma), params.coef0);
-                            break;
-                        case kernel_function_type::laplacian:
-                            detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                            break;
-                        case kernel_function_type::chi_squared:
-                            detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                            break;
+        // check whether the current device is responsible for at least one data point!
+        if (dist.place_specific_num_rows(0) > std::size_t{ 0 }) {
+            if (!comm_.is_main_rank()) {
+                // MPI rank 0 always touches all values in C -> other MPI ranks do not need C
+                C *= real_type{ 0.0 };
+            }
+
+            // calculate the number of data points this device is responsible for
+            const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
+            // get the offset of the data points this device is responsible for
+            const std::size_t row_offset = dist.place_row_offset(0);
+
+            const std::size_t num_rhs = B.shape().x;
+            const std::size_t num_rows = B.shape().y;
+
+            switch (solver) {
+                case solver_type::automatic:
+                    // unreachable
+                    break;
+                case solver_type::cg_explicit:
+                    {
+                        const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
+                        PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
+
+                        detail::device_kernel_symm(num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, explicit_A, B, beta, C);
+
+                        const std::size_t num_mirror_rows = num_rows - row_offset - device_specific_num_rows;
+                        if (num_mirror_rows > std::size_t{ 0 }) {
+                            detail::device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, explicit_A, B, beta, C);
+                        }
                     }
-                }
-                break;
+                    break;
+                case solver_type::cg_implicit:
+                    {
+                        const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
+                        PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
+                        PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+                        const real_type cost = real_type{ 1.0 } / params.cost;
+
+                        if (comm_.is_main_rank()) {
+                            // we do not perform the beta scale in C in the cg_implicit device kernel
+                            // -> calculate it using a separate kernel (always on device 0 and MPI rank 0!)
+                            C *= beta;
+                        }
+
+                        switch (params.kernel_type) {
+                            case kernel_function_type::linear:
+                                detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C);
+                                break;
+                            case kernel_function_type::polynomial:
+                                detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                                break;
+                            case kernel_function_type::rbf:
+                                detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma));
+                                break;
+                            case kernel_function_type::sigmoid:
+                                detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma), params.coef0);
+                                break;
+                            case kernel_function_type::laplacian:
+                                detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma));
+                                break;
+                            case kernel_function_type::chi_squared:
+                                detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma));
+                                break;
+                        }
+                    }
+                    break;
+            }
         }
     });
     // wait until operation is completed
@@ -212,6 +272,7 @@ aos_matrix<real_type> csvm::predict_values(const parameter &params,
 
     // defined sizes
     const std::size_t num_classes = alpha.num_rows();
+    const std::size_t num_sv = support_vectors.num_rows();
     const std::size_t num_predict_points = predict_points.num_rows();
     const std::size_t num_features = predict_points.num_cols();
 
@@ -222,33 +283,51 @@ aos_matrix<real_type> csvm::predict_values(const parameter &params,
         if (params.kernel_type == kernel_function_type::linear) {
             // special optimization for the linear kernel function
             if (w.empty()) {
+                // update the data distribution to account for the support vectors
+                data_distribution_ = std::make_unique<::plssvm::detail::rectangular_data_distribution>(comm_, num_sv, this->num_available_devices());
+
                 // fill w vector
                 w = soa_matrix<real_type>{ plssvm::shape{ num_classes, num_features }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
-                detail::device_kernel_w_linear(w, alpha, support_vectors);
+
+                if (data_distribution_->place_specific_num_rows(0) > std::size_t{ 0 }) {
+                    const std::size_t device_specific_num_sv = data_distribution_->place_specific_num_rows(0);
+                    const std::size_t sv_offset = data_distribution_->place_row_offset(0);
+
+                    detail::device_kernel_w_linear(w, alpha, support_vectors, device_specific_num_sv, sv_offset);
+                }
+
+                // reduce w on all MPI ranks
+                comm_.allreduce_inplace(w);
             }
         }
 
-        // call the predict kernels
-        switch (params.kernel_type) {
-            case kernel_function_type::linear:
-                // predict the values using the w vector
-                detail::device_kernel_predict_linear(out, w, rho, predict_points);
-                break;
-            case kernel_function_type::polynomial:
-                detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, params.degree, std::get<real_type>(params.gamma), params.coef0);
-                break;
-            case kernel_function_type::rbf:
-                detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-                break;
-            case kernel_function_type::sigmoid:
-                detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma), params.coef0);
-                break;
-            case kernel_function_type::laplacian:
-                detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-                break;
-            case kernel_function_type::chi_squared:
-                detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-                break;
+        data_distribution_ = std::make_unique<::plssvm::detail::rectangular_data_distribution>(comm_, num_predict_points, this->num_available_devices());
+        const std::size_t device_specific_num_predict_points = data_distribution_->place_specific_num_rows(0);
+        const std::size_t row_offset = data_distribution_->place_row_offset(0);
+
+        if (data_distribution_->place_specific_num_rows(0) > std::size_t{ 0 }) {
+            // call the predict kernels
+            switch (params.kernel_type) {
+                case kernel_function_type::linear:
+                    // predict the values using the w vector
+                    detail::device_kernel_predict_linear(out, w, rho, predict_points, device_specific_num_predict_points, row_offset);
+                    break;
+                case kernel_function_type::polynomial:
+                    detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                    break;
+                case kernel_function_type::rbf:
+                    detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma));
+                    break;
+                case kernel_function_type::sigmoid:
+                    detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma), params.coef0);
+                    break;
+                case kernel_function_type::laplacian:
+                    detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma));
+                    break;
+                case kernel_function_type::chi_squared:
+                    detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma));
+                    break;
+            }
         }
     });
     // wait until operation is completed
diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
index c71c43507..9cd2265fc 100644
--- a/src/plssvm/backends/HPX/detail/utility.cpp
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -9,9 +9,10 @@
 
 #include "plssvm/backends/HPX/detail/utility.hpp"
 
-#include <hpx/runtime_distributed.hpp>  // ::hpx::get_num_worker_threads
-#include <hpx/version.hpp>              // ::hpx::full_version_as_string
-#include <string>                       // std::string
+#include "hpx/runtime_distributed.hpp"  // ::hpx::get_num_worker_threads
+#include "hpx/version.hpp"              // ::hpx::full_version_as_string
+
+#include <string>  // std::string
 
 namespace plssvm::hpx::detail {
 
diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt
index 8b72f9af3..676d9f82b 100644
--- a/src/plssvm/backends/Kokkos/CMakeLists.txt
+++ b/src/plssvm/backends/Kokkos/CMakeLists.txt
@@ -40,77 +40,31 @@ if (Kokkos_ENABLE_SYCL)
     if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
         message(FATAL_ERROR "For Kokkos::SYCL to work, the compiler must be IntelLLVM, but is ${CMAKE_CXX_COMPILER}!")
     endif ()
-    
-    target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -sycl-std=2020 -fsycl)
-    target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fsycl)
 
-    # set icpx specific compiler flags based on the provided PLSSVM_TARGET_PLATFORMS
-    set(PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "")
-    # cpu targets
-    if (DEFINED PLSSVM_CPU_TARGET_ARCHS)
-        # assemble -fsycl-targets
-        list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "spir64_x86_64")
-    endif ()
-    # nvidia targets
-    if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
-        # assemble -fsycl-targets
-        list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "nvptx64-nvidia-cuda")
-    endif ()
-    # amd targets
-    if (DEFINED PLSSVM_AMD_TARGET_ARCHS)
-        # assemble -fsycl-targets
-        list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "amdgcn-amd-amdhsa")
-        # add target specific flags for AOT -> must always be specified von amd targets
-        if (NOT PLSSVM_NUM_AMD_TARGET_ARCHS EQUAL 1)
-            message(SEND_ERROR "IntelLLVM currently only supports a single AMD architecture specification but ${PLSSVM_NUM_AMD_TARGET_ARCHS} were provided!")
-        endif ()
-        target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${PLSSVM_AMD_TARGET_ARCHS})
-        target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${PLSSVM_AMD_TARGET_ARCHS})
-    endif ()
-    # set -fsycl-targets
-    list(JOIN PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "," PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING)
-    if (NOT PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS STREQUAL "")
-        target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fsycl-targets=${PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING})
-        target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fsycl-targets=${PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING})
+    if (PLSSVM_ENABLE_FAST_MATH)
+        target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -ffast-math)
+    else ()
+        # icpx does compile with fast-math by default -> explicitly disable it if not requested otherwise!
+        target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fno-fast-math)
     endif ()
 
-    # add option for IntelLLVM Ahead-of-Time (AOT) compilation
-    option(PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT "Enables Ahead-of-Time compilation for the Kokkos::SYCL execution space using IntelLLVM." ON)
-    if (PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT)
-        message(STATUS "Enabled Ahead-of-Time (AOT) compilation for the Kokkos::SYCL execution space using IntelLLVM.")
-        target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT)
-        target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT)
-        # set AOT compiler flags
-        if (DEFINED PLSSVM_CPU_TARGET_ARCHS)
-            # add CPU target specific flags for AOT
-            if (PLSSVM_NUM_CPU_TARGET_ARCHS EQUAL 1)
-                target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}")
-                target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}")
-            endif ()
-        endif ()
-        if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
-            # add NVIDIA GPU target specific flags for AOT
-            if (NOT PLSSVM_NUM_NVIDIA_TARGET_ARCHS EQUAL 1)
-                message(
-                    SEND_ERROR
-                        "IntelLLVM currently only supports a single NVIDIA architecture specification for AOT but ${PLSSVM_NUM_NVIDIA_TARGET_ARCHS} were provided!"
-                )
-            endif ()
-            target_compile_options(
-                ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}
-            )
-            target_link_options(
-                ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}
-            )
-        endif ()
-        if (DEFINED PLSSVM_INTEL_TARGET_ARCHS)
-            target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fsycl-targets=spir64_gen)
-            target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fsycl-targets=spir64_gen)
-            # add Intel GPU target specific flags for AOT
-            list(JOIN PLSSVM_INTEL_TARGET_ARCHS "," PLSSVM_INTEL_TARGET_ARCHS_STRING)
-            target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}")
-            target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}")
-        endif ()
+    target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -sycl-std=2020 -fsycl)
+    target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fsycl)
+
+    # ~~~
+    # assemble the icpx specific compiler flags and add them to the Kokkos backend target
+    # -> Kokkos sets the values for NVIDIA and AMD correctly, but does NOT set AOT for Intel GPUs!
+    # ~~~
+    include(${PROJECT_SOURCE_DIR}/cmake/assemble_icpx_sycl_target_flags.cmake)
+    assemble_icpx_sycl_target_flags(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE)
+
+    # add option to enable multi-GPU support with Kokkos::SYCL -> currently broken, but maybe possible in a feature release
+    option(PLSSVM_KOKKOS_BACKEND_SYCL_ENABLE_MULTI_GPU
+           "Enables multi-GPU support for the Kokkos::SYCL execution space (which is broken on the Kokkos side as of Kokkos 4.6.00)." OFF
+    )
+    if (PLSSVM_KOKKOS_BACKEND_SYCL_ENABLE_MULTI_GPU)
+        message(STATUS "Enabled multi-GPU support for the Kokkos::SYCL execution space.")
+        target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE PLSSVM_KOKKOS_BACKEND_SYCL_ENABLE_MULTI_GPU)
     endif ()
 endif ()
 
diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp
index a6e773719..0c5e570c6 100644
--- a/src/plssvm/backends/Kokkos/csvm.cpp
+++ b/src/plssvm/backends/Kokkos/csvm.cpp
@@ -8,6 +8,7 @@
 
 #include "plssvm/backends/Kokkos/csvm.hpp"
 
+#include "plssvm/backend_types.hpp"                                                   // plssvm::backend_type
 #include "plssvm/backends/execution_range.hpp"                                        // plssvm::detail::{execution_range, dim_type}
 #include "plssvm/backends/Kokkos/detail/conditional_execution.hpp"                    // PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_*, PLSSVM_KOKKOS_BACKEND_INVOKE_IF_
 #include "plssvm/backends/Kokkos/detail/device_ptr.hpp"                               // plssvm::kokkos::detail::device_ptr
@@ -22,13 +23,16 @@
 #include "plssvm/constants.hpp"                                                       // plssvm::THREAD_BLOCK_SIZE, plssvm::INTERNAL_BLOCK_SIZE, plssvm::FEATURE_BLOCK_SIZE
 #include "plssvm/detail/assert.hpp"                                                   // PLSSVM_ASSERT
 #include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::triangular_data_distribution
-#include "plssvm/detail/logging.hpp"                                                  // plssvm::detail::log
+#include "plssvm/detail/logging/log_untracked.hpp"                                    // plssvm::detail::log_untracked
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"                                // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                                              // plssvm::detail::memory_size
 #include "plssvm/detail/tracking/performance_tracker.hpp"                             // plssvm::detail::tracking::tracking_entry
 #include "plssvm/detail/type_traits.hpp"                                              // plssvm::detail::remove_cvref_t
 #include "plssvm/detail/utility.hpp"                                                  // plssvm::detail::{get_system_memory, unreachable}
 #include "plssvm/exceptions/exceptions.hpp"                                           // plssvm::exception
 #include "plssvm/kernel_function_types.hpp"                                           // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                                                // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"                                          // plssvm::mpi::detail::gather_and_print_csvm_information
 #include "plssvm/parameter.hpp"                                                       // plssvm::parameter
 #include "plssvm/target_platforms.hpp"                                                // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                                                // plssvm::verbosity_level
@@ -124,10 +128,13 @@ void csvm::init(const target_platform target) {
             }
         }
 
-        // output what we use as automatic Kokkos execution space
-        plssvm::detail::log(verbosity_level::full,
-                            "\nUsing {} as automatic Kokkos::ExecutionSpace.",
-                            space_);
+        if (comm_.size() == 1) {
+            // output what we use as automatic Kokkos execution space
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "\nUsing {} as automatic Kokkos::ExecutionSpace.",
+                                          space_);
+        }
     } else {
         // execution space explicitly provided and potentially automatically determine the target platform
         if (target == target_platform::automatic) {
@@ -156,6 +163,9 @@ void csvm::init(const target_platform target) {
         }
     }
 
+    // get all available devices wrt the requested target platform
+    devices_ = detail::get_device_list(space_, target_, comm_);
+
     // At this point, space_ may NEVER be execution_space::automatic!
     PLSSVM_ASSERT(space_ != execution_space::automatic, "At this point, the Kokkos execution space must be determined and must NOT be automatic!");
     PLSSVM_ASSERT(target_ != target_platform::automatic, "At this point, the target platform must be determined and must NOT be automatic!");
@@ -165,45 +175,60 @@ void csvm::init(const target_platform target) {
         throw backend_exception{ fmt::format("The Kokkos execution space {} is currently not supported!", space_) };
     }
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing Kokkos ({}) as backend with the Kokkos::ExecutionSpace {}.\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_version", detail::get_kokkos_version() },
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_default_execution_space", space_ });
-
-    // output automatic target platform information
-    if (target == target_platform::automatic) {
-        plssvm::detail::log(verbosity_level::full,
-                            "Using {} as automatic target platform.\n",
-                            target_);
-    }
-
-    // get all available devices wrt the requested target platform
-    devices_ = detail::get_device_list(space_, target_);
-
     // throw exception if no devices in the current execution space could be found
     if (devices_.empty()) {
         throw backend_exception{ fmt::format("No devices found for the Kokkos execution space {} with the target platform {}!", space_, target_) };
     }
 
-    // print found Kokkos devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} Kokkos device(s) for the target platform {}:\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
-
     std::vector<std::string> device_names{};
     device_names.reserve(devices_.size());
-    for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
-        const std::string device_name = detail::get_device_name(devices_[device]);
-        plssvm::detail::log(verbosity_level::full,
-                            "  [{}, {}]\n",
-                            device,
-                            device_name);
-        device_names.emplace_back(device_name);
+
+    if (comm_.size() > 1) {
+        // use MPI rank specific command line output
+        for (const queue_type &device : devices_) {
+            device_names.emplace_back(detail::get_device_name(device));
+        }
+
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::kokkos, target_, device_names, fmt::format("{}", space_));
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing Kokkos ({}) as backend with the Kokkos::ExecutionSpace {}.\n",
+                                      detail::get_kokkos_version(),
+                                      space_);
+        if (target == target_platform::automatic) {
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "Using {} as automatic target platform.\n",
+                                          target_);
+        }
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "Found {} Kokkos device(s) for the target platform {}:\n",
+                                      devices_.size(),
+                                      target_);
+
+        for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
+            const std::string device_name = detail::get_device_name(devices_[device]);
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          "  [{}, {}]\n",
+                                          device,
+                                          device_name);
+            device_names.emplace_back(device_name);
+        }
     }
+
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
+
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_version", detail::get_kokkos_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::kokkos }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_execution_space", space_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() }));
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names }));
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
 }
 
 csvm::~csvm() {
diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp
index 35dd6c2e9..f15dfca9e 100644
--- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp
+++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp
@@ -12,9 +12,11 @@
 #include "plssvm/backends/Kokkos/exceptions.hpp"                    // plssvm::kokkos::backend_exception
 #include "plssvm/backends/Kokkos/execution_space.hpp"               // plssvm::kokkos::execution_space
 #include "plssvm/detail/assert.hpp"                                 // PLSSVM_ASSERT
-#include "plssvm/detail/logging_without_performance_tracking.hpp"   // plssvm::detail::log_untracked
+#include "plssvm/detail/logging/log_untracked.hpp"                  // plssvm::detail::log_untracked
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"              // plssvm::detail::log_untracked
 #include "plssvm/detail/string_utility.hpp"                         // plssvm::detail::as_lower_case
 #include "plssvm/detail/utility.hpp"                                // plssvm::detail::contains
+#include "plssvm/mpi/communicator.hpp"                              // plssvm::mpi::communicator
 #include "plssvm/target_platforms.hpp"                              // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                              // plssvm::verbosity_level
 
@@ -38,7 +40,7 @@
 
 namespace plssvm::kokkos::detail {
 
-std::vector<device_wrapper> get_device_list(const execution_space space, [[maybe_unused]] const target_platform target) {
+std::vector<device_wrapper> get_device_list(const execution_space space, [[maybe_unused]] const target_platform target, [[maybe_unused]] const mpi::communicator &comm) {
     PLSSVM_ASSERT(space != execution_space::automatic, "The automatic execution_space may not be provided to this function!");
 
     std::vector<device_wrapper> devices{};
@@ -79,9 +81,7 @@ std::vector<device_wrapper> get_device_list(const execution_space space, [[maybe
                 for (const auto &platform : ::sycl::platform::get_platforms()) {
                     for (const auto &device : platform.get_devices()) {
                         // Note: Kokkos is IntelLLVM/DPC++/icpx only
-                        if (device.is_cpu() && target == target_platform::cpu) {
-                            devices.emplace_back(Kokkos::SYCL{ ::sycl::queue{ device, props } });
-                        } else if (device.is_gpu()) {
+                        if (device.is_gpu()) {
                             // the current device is a GPU
                             // get vendor string and convert it to all lower case
                             const std::string vendor_string = ::plssvm::detail::as_lower_case(device.get_info<::sycl::info::device::vendor>());
@@ -99,6 +99,17 @@ std::vector<device_wrapper> get_device_list(const execution_space space, [[maybe
                         }
                     }
                 }
+
+#if !defined(PLSSVM_KOKKOS_BACKEND_SYCL_ENABLE_MULTI_GPU)
+                if (devices.size() > 1) {
+                    ::plssvm::detail::log_untracked(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
+                                                    "\nFound {} devices on MPI rank {} for the Kokkos::SYCL execution space, but multi-GPU support is disabled. Using only device 1.",
+                                                    devices.size(),
+                                                    comm.rank());
+                    // only use the first GPU found (which most likely is the default device)
+                    devices.resize(1);
+                }
+#endif
             }));
             break;
         case execution_space::hpx:
@@ -111,6 +122,7 @@ std::vector<device_wrapper> get_device_list(const execution_space space, [[maybe
                 // Note: if OpenMP should be used as device  must be set in order for it to work!
                 if (omp_get_nested() == 0) {
                     ::plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                                    comm,
                                                     "WARNING: In order for Kokkos::OpenMP to work properly, we have to set \"omp_set_nested(1)\"!\n");
                     // enable OMP_NESTED support
                     // Note: function is officially deprecated but still necessary for Kokkos::OpenMP to work properly
diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp
index 5dc3f8cda..e8b8ada6d 100644
--- a/src/plssvm/backends/Kokkos/detail/utility.cpp
+++ b/src/plssvm/backends/Kokkos/detail/utility.cpp
@@ -63,9 +63,7 @@ std::map<target_platform, std::vector<execution_space>> available_target_platfor
                     for (const auto &platform : ::sycl::platform::get_platforms()) {
                         for (const auto &device : platform.get_devices()) {
                             // Note: Kokkos is Intel LLVM/DPC++/icpx only
-                            if (device.is_cpu()) {
-                                targets.insert(target_platform::cpu);
-                            } else if (device.is_gpu()) {
+                            if (device.is_gpu()) {
                                 // the current device is a GPU
                                 // get vendor string and convert it to all lower case
                                 const std::string vendor_string = ::plssvm::detail::as_lower_case(device.get_info<::sycl::info::device::vendor>());
diff --git a/src/plssvm/backends/OpenCL/CMakeLists.txt b/src/plssvm/backends/OpenCL/CMakeLists.txt
index c0b59d124..c79869c05 100644
--- a/src/plssvm/backends/OpenCL/CMakeLists.txt
+++ b/src/plssvm/backends/OpenCL/CMakeLists.txt
@@ -26,6 +26,7 @@ set(PLSSVM_OPENCL_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/detail/context.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/error_code.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/jit_info.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/kernel.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
diff --git a/src/plssvm/backends/OpenCL/csvm.cpp b/src/plssvm/backends/OpenCL/csvm.cpp
index aa1fe39b2..671a911fa 100644
--- a/src/plssvm/backends/OpenCL/csvm.cpp
+++ b/src/plssvm/backends/OpenCL/csvm.cpp
@@ -13,19 +13,23 @@
 #include "plssvm/backends/OpenCL/detail/command_queue.hpp"  // plssvm::opencl::detail::command_queue
 #include "plssvm/backends/OpenCL/detail/context.hpp"        // plssvm::opencl::detail::context
 #include "plssvm/backends/OpenCL/detail/device_ptr.hpp"     // plssvm::opencl::detail::device_ptr
+#include "plssvm/backends/OpenCL/detail/jit_info.hpp"       // plssvm::opencl::detail::create_jit_report
 #include "plssvm/backends/OpenCL/detail/kernel.hpp"         // plssvm::opencl::detail::{compute_kernel_name, kernel}
 #include "plssvm/backends/OpenCL/detail/utility.hpp"        // PLSSVM_OPENCL_ERROR_CHECK, plssvm::opencl::detail::{get_contexts, create_command_queues, run_kernel, kernel_type_to_function_name, device_synchronize, get_opencl_target_version, get_driver_version}
 #include "plssvm/backends/OpenCL/exceptions.hpp"            // plssvm::opencl::backend_exception
 #include "plssvm/constants.hpp"                             // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
 #include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
 #include "plssvm/detail/data_distribution.hpp"              // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
-#include "plssvm/detail/logging.hpp"                        // plssvm::detail::log
+#include "plssvm/detail/logging/log_untracked.hpp"          // plssvm::detail::log_untracked
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"      // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                    // plssvm::detail::memory_size
 #include "plssvm/detail/tracking/performance_tracker.hpp"   // plssvm::detail::tracking::tracking_entry
 #include "plssvm/detail/utility.hpp"                        // plssvm::detail::contains
 #include "plssvm/exceptions/exceptions.hpp"                 // plssvm::exception
 #include "plssvm/gamma.hpp"                                 // plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                      // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"                // plssvm::mpi::detail::gather_and_print_csvm_information
 #include "plssvm/parameter.hpp"                             // plssvm::parameter, plssvm::detail::parameter
 #include "plssvm/shape.hpp"                                 // plssvm::shape
 #include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
@@ -80,9 +84,6 @@ csvm::csvm(const target_platform target) {
             break;
     }
 
-    // get kernel type from base class
-    const kernel_function_type kernel = base_type::get_params().kernel_type;
-
     // get all available OpenCL contexts for the current target including devices with respect to the requested target platform
     std::tie(contexts_, target_) = detail::get_contexts(target);
 
@@ -101,53 +102,72 @@ csvm::csvm(const target_platform target) {
         throw backend_exception{ fmt::format("OpenCL backend selected but no devices for the target {} were found!", target) };
     }
 
-    // print OpenCL info
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing OpenCL (target version: {}) as backend.\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "opencl_target_version", detail::get_opencl_target_version() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "opencl_library", std::string{ PLSSVM_OPENCL_LIBRARY } }));
-    if (target == target_platform::automatic) {
-        plssvm::detail::log(verbosity_level::full,
-                            "Using {} as automatic target platform.\n",
-                            target_);
-    }
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::opencl }));
+    // create command_queues and JIT compile OpenCL kernels; compile all kernels for float and double
+    detail::jit_info info{};
+    std::tie(devices_, info) = detail::create_command_queues(comm_, contexts_, params_.kernel_type);
 
-    // create command_queues and JIT compile OpenCL kernels
-    const auto jit_start_time = std::chrono::steady_clock::now();
-
-    // get kernel names
-    const std::vector<std::pair<detail::compute_kernel_name, std::string>> kernel_names = detail::kernel_type_to_function_names();
-    // compile all kernels for float and double
-    devices_ = detail::create_command_queues(contexts_, kernel, kernel_names);
-
-    const auto jit_end_time = std::chrono::steady_clock::now();
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\nOpenCL kernel JIT compilation done in {}.\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "jit_compilation_time", std::chrono::duration_cast<std::chrono::milliseconds>(jit_end_time - jit_start_time) });
-
-    // print found OpenCL devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} OpenCL device(s) for the target platform {}:\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
-    std::vector<std::string> device_names;
+    std::vector<std::string> device_names{};
     device_names.reserve(devices_.size());
-    for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
-        const std::string device_name = detail::get_device_name(devices_[device]);
-        plssvm::detail::log(verbosity_level::full,
-                            "  [{}, {}]\n",
-                            device,
-                            device_name);
-        device_names.emplace_back(device_name);
-
-        // get the target platform's driver version
-        const std::string driver_version = detail::get_driver_version(devices_[device]);
-        PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "device_driver_version", driver_version }));
+    std::vector<std::string> driver_versions{};
+    driver_versions.reserve(comm_.size());
+
+    if (comm_.size() > 1) {
+        // use MPI rank specific command line output
+        for (const queue_type &device : devices_) {
+            device_names.emplace_back(detail::get_device_name(device));
+            // get the target platform's driver version
+            driver_versions.emplace_back(detail::get_driver_version(device));
+        }
+
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::opencl, target_, device_names, detail::create_jit_report(info));
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                      comm_,
+                                      "\nOpenCL kernel JIT compilation done in {}.\n",
+                                      info.duration);
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing OpenCL (target version: {}) as backend.\n",
+                                      detail::get_opencl_target_version());
+        if (target == target_platform::automatic) {
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "Using {} as automatic target platform.\n",
+                                          target_);
+        }
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "Found {} OpenCL device(s) for the target platform {}:\n",
+                                      devices_.size(),
+                                      target_);
+
+        for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
+            const std::string device_name = detail::get_device_name(devices_[device]);
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "  [{}, {}]\n",
+                                          device,
+                                          device_name);
+            device_names.emplace_back(device_name);
+
+            // get the target platform's driver version
+            driver_versions.emplace_back(detail::get_driver_version(devices_[device]));
+        }
     }
+
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
+
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "opencl_target_version", detail::get_opencl_target_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "opencl_library", std::string{ PLSSVM_OPENCL_LIBRARY } }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "device_driver_version", driver_versions }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::opencl }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() }));
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names }));
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "jit_compilation_time", info.duration }));
 
     // sanity checks for the number of the OpenCL kernels
     PLSSVM_ASSERT(std::all_of(devices_.begin(), devices_.end(), [](const queue_type &queue) { return queue.kernels.size() == 13; }),
diff --git a/src/plssvm/backends/OpenCL/detail/jit_info.cpp b/src/plssvm/backends/OpenCL/detail/jit_info.cpp
new file mode 100644
index 000000000..118925dac
--- /dev/null
+++ b/src/plssvm/backends/OpenCL/detail/jit_info.cpp
@@ -0,0 +1,41 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/OpenCL/detail/jit_info.hpp"
+
+#include "fmt/chrono.h"  // format std::chrono types
+#include "fmt/format.h"  // fmt::format
+
+#include <chrono>   // std::chrono::milliseconds
+#include <ostream>  // std::ostream
+#include <string>   // std::string
+
+namespace plssvm::opencl::detail {
+
+std::ostream &operator<<(std::ostream &out, const jit_info::caching_status status) {
+    switch (status) {
+        case jit_info::caching_status::success:
+            return out << "success";
+        case jit_info::caching_status::error_no_cached_files:
+            return out << "no cached files exist (checksum missmatch)";
+        case jit_info::caching_status::error_invalid_number_of_cached_files:
+            return out << "invalid number of cached files";
+    }
+    return out << "unknown";
+}
+
+std::string create_jit_report(const jit_info &info) {
+    std::string report = fmt::format("{}; ", info.duration);
+    if (info.use_ptx_inline) {
+        report += "PTX inline; ";
+    }
+    report += fmt::format("cache: {} ({})", info.cache_state, info.cache_dir);
+    return report;
+}
+
+}  // namespace plssvm::opencl::detail
diff --git a/src/plssvm/backends/OpenCL/detail/utility.cpp b/src/plssvm/backends/OpenCL/detail/utility.cpp
index b29b32be4..b1113016c 100644
--- a/src/plssvm/backends/OpenCL/detail/utility.cpp
+++ b/src/plssvm/backends/OpenCL/detail/utility.cpp
@@ -11,11 +11,12 @@
 #include "plssvm/backends/OpenCL/detail/command_queue.hpp"  // plssvm::opencl::detail::command_queue
 #include "plssvm/backends/OpenCL/detail/context.hpp"        // plssvm::opencl::detail::context
 #include "plssvm/backends/OpenCL/detail/error_code.hpp"     // plssvm::opencl::detail::error_code
+#include "plssvm/backends/OpenCL/detail/jit_info.hpp"       // plssvm::opencl::detail::jit_info
 #include "plssvm/backends/OpenCL/detail/kernel.hpp"         // plssvm::opencl::detail::compute_kernel_name, plssvm::opencl::detail::kernel
 #include "plssvm/constants.hpp"                             // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
 #include "plssvm/detail/arithmetic_type_name.hpp"           // plssvm::detail::arithmetic_type_name
 #include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
-#include "plssvm/detail/logging.hpp"                        // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"      // plssvm::detail::log_untracked
 #include "plssvm/detail/sha256.hpp"                         // plssvm::detail::sha256
 #include "plssvm/detail/string_conversion.hpp"              // plssvm::detail::extract_first_integer_from_string
 #include "plssvm/detail/string_utility.hpp"                 // plssvm::detail::replace_all, plssvm::detail::to_lower_case, plssvm::detail::contains
@@ -23,6 +24,7 @@
 #include "plssvm/detail/utility.hpp"                        // plssvm::detail::erase_if
 #include "plssvm/exceptions/exceptions.hpp"                 // plssvm::platform_devices_empty
 #include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/mpi/communicator.hpp"                      // plssvm::mpi::communicator
 #include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                      // plssvm::verbosity_level
 
@@ -38,6 +40,7 @@
 
 #include <algorithm>     // std::count_if
 #include <array>         // std::array
+#include <chrono>        // std::chrono::{steady_clock, duration_cast, milliseconds}
 #include <cstddef>       // std::size_t
 #include <filesystem>    // std::filesystem::{path, temp_directory_path, exists, directory_iterator, directory_entry, begin, end}
 #include <fstream>       // std::ifstream, std::ofstream
@@ -208,14 +211,17 @@ std::vector<std::pair<compute_kernel_name, std::string>> kernel_type_to_function
     return kernels;
 }
 
-std::vector<command_queue> create_command_queues(const std::vector<context> &contexts, const kernel_function_type kernel_function, const std::vector<std::pair<compute_kernel_name, std::string>> &kernel_names) {
+std::pair<std::vector<command_queue>, jit_info> create_command_queues(const mpi::communicator &comm, const std::vector<context> &contexts, const kernel_function_type kernel_function) {
+    jit_info info{};
+    const auto jit_start_time = std::chrono::steady_clock::now();
+
     std::vector<command_queue> queues;
     for (std::vector<cl_device_id>::size_type device = 0; device < contexts[0].devices.size(); ++device) {
         queues.emplace_back(contexts[0], contexts[0].devices[device]);
     }
     PLSSVM_ASSERT(!queues.empty(), "At least one command queue must be available!");
 
-    const auto cl_build_program_error_message = [](cl_program prog, cl_device_id device, const std::size_t device_idx) {
+    const auto cl_build_program_error_message = [&comm](cl_program prog, cl_device_id device, const std::size_t device_idx) {
         // determine the size of the log
         std::size_t log_size{};
         PLSSVM_OPENCL_ERROR_CHECK(clGetProgramBuildInfo(prog, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size), "error retrieving the program build log size")
@@ -225,7 +231,7 @@ std::vector<command_queue> create_command_queues(const std::vector<context> &con
             // get the log
             PLSSVM_OPENCL_ERROR_CHECK(clGetProgramBuildInfo(prog, device, CL_PROGRAM_BUILD_LOG, log_size, log.data(), nullptr), "error retrieving the program build log")
             // print the log
-            std::cerr << fmt::format("error building OpenCL program on device {}:\n{}", device_idx, log) << std::endl;
+            std::cerr << fmt::format("error building OpenCL program on device {} on MPI rank {}:\n{}", device_idx, comm.rank(), log) << std::endl;
         }
     };
 
@@ -244,8 +250,12 @@ std::vector<command_queue> create_command_queues(const std::vector<context> &con
     const bool use_inline_assembly = ::plssvm::detail::contains(::plssvm::detail::as_lower_case(platform_vendor), "nvidia");
     if (use_inline_assembly) {
         compile_options += " -DPLSSVM_USE_NVIDIA_PTX_INLINE_ASSEMBLY";
-        plssvm::detail::log(verbosity_level::full,
-                            "Enabling atomicAdd acceleration using PTX inline assembly.\n");
+        if (comm.size() == 1) {
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm,
+                                          "Enabling atomicAdd acceleration using PTX inline assembly.\n");
+        }
+        info.use_ptx_inline = true;
     }
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "opencl", "use_inline_assembly", use_inline_assembly }));
 #endif
@@ -383,47 +393,33 @@ std::vector<command_queue> create_command_queues(const std::vector<context> &con
     std::vector<unsigned char *> binaries_ptr(binaries.size());
 
     // create caching folder in the temporary directory and change the permissions such that everybody has read/write access
-    const std::filesystem::path cache_dir_name = std::filesystem::temp_directory_path() / "plssvm_opencl_cache" / checksum;
-
-    // potential reasons why OpenCL caching could fail
-    enum class caching_status {
-        success,
-        error_no_cached_files,
-        error_invalid_number_of_cached_files,
-    };
-    // message associated with the failed caching reason
-    const auto caching_status_to_string = [](const caching_status status) {
-        switch (status) {
-            case caching_status::error_no_cached_files:
-                return "no cached files exist (checksum missmatch)";
-            case caching_status::error_invalid_number_of_cached_files:
-                return "invalid number of cached files";
-            default:
-                return "";
-        }
-    };
+    const std::filesystem::path cache_dir_name = std::filesystem::temp_directory_path() / "plssvm_opencl_cache" / checksum / fmt::format("rank_{}", comm.rank());
+    info.cache_dir = cache_dir_name;
 
     // assume caching was successful
-    caching_status use_cached_binaries = caching_status::success;
+    info.cache_state = jit_info::caching_status::success;
 
     // check if cache directory exists
     if (!std::filesystem::exists(cache_dir_name)) {
-        use_cached_binaries = caching_status::error_no_cached_files;
+        info.cache_state = jit_info::caching_status::error_no_cached_files;
     }
     // if the cache directory exists, check the number of files
-    if (use_cached_binaries == caching_status::success) {
+    if (info.cache_state == jit_info::caching_status::success) {
         // get directory iterator
         auto dirIter = std::filesystem::directory_iterator(cache_dir_name);
         // get files in directory -> account for stored preprocessed source file
         if (static_cast<std::size_t>(std::count_if(std::filesystem::begin(dirIter), std::filesystem::end(dirIter), [](const auto &entry) { return entry.is_regular_file(); })) != contexts[0].devices.size() + 1) {
-            use_cached_binaries = caching_status::error_invalid_number_of_cached_files;
+            info.cache_state = jit_info::caching_status::error_invalid_number_of_cached_files;
         }
     }
 
-    if (use_cached_binaries != caching_status::success) {
-        plssvm::detail::log(verbosity_level::full,
-                            "Building OpenCL kernels from source (reason: {}).\n",
-                            caching_status_to_string(use_cached_binaries));
+    if (info.cache_state != jit_info::caching_status::success) {
+        if (comm.size() == 1) {
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm,
+                                          "Building OpenCL kernels from source (reason: {}).\n",
+                                          info.cache_state);
+        }
 
         // create and build program
         cl_program program = clCreateProgramWithSource(contexts[0], 1, &kernel_src_ptr, nullptr, &err);
@@ -472,18 +468,25 @@ std::vector<command_queue> create_command_queues(const std::vector<context> &con
             std::ofstream out{ cache_dir_name / "processed_source.cl" };
             out << kernel_src_string;
         }
-        plssvm::detail::log(verbosity_level::full,
-                            "Cached OpenCL kernel binaries in {}.\n",
-                            cache_dir_name);
+
+        if (comm.size() == 1) {
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm,
+                                          "Cached OpenCL kernel binaries in {}.\n",
+                                          cache_dir_name);
+        }
 
         // release resource
         if (program) {
             PLSSVM_OPENCL_ERROR_CHECK(clReleaseProgram(program), "error releasing OpenCL program resources")
         }
     } else {
-        plssvm::detail::log(verbosity_level::full,
-                            "Using cached OpenCL kernel binaries from {}.\n",
-                            cache_dir_name);
+        if (comm.size() == 1) {
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm,
+                                          "Using cached OpenCL kernel binaries from {}.\n",
+                                          cache_dir_name);
+        }
 
         const auto common_read_file = [](const std::filesystem::path &file) -> std::pair<std::vector<unsigned char>, std::size_t> {
             std::ifstream f{ file };
@@ -531,7 +534,7 @@ std::vector<command_queue> create_command_queues(const std::vector<context> &con
 
     // build all kernels, one for each device
     for (std::vector<cl_device_id>::size_type device = 0; device < contexts[0].devices.size(); ++device) {
-        for (const std::pair<compute_kernel_name, std::string> &name : kernel_names) {
+        for (const std::pair<compute_kernel_name, std::string> &name : detail::kernel_type_to_function_names()) {
             // create kernel
             queues[device].add_kernel(name.first, kernel{ clCreateKernel(binary_program, name.second.c_str(), &err) });
             PLSSVM_OPENCL_ERROR_CHECK(err, fmt::format("error creating OpenCL kernel {} for device {}", name.second, device))
@@ -543,7 +546,9 @@ std::vector<command_queue> create_command_queues(const std::vector<context> &con
         PLSSVM_OPENCL_ERROR_CHECK(clReleaseProgram(binary_program), "error releasing OpenCL binary program resources")
     }
 
-    return queues;
+    const auto jit_end_time = std::chrono::steady_clock::now();
+    info.duration = std::chrono::duration_cast<std::chrono::milliseconds>(jit_end_time - jit_start_time);
+    return std::make_pair(std::move(queues), info);
 }
 
 }  // namespace plssvm::opencl::detail
diff --git a/src/plssvm/backends/OpenMP/csvm.cpp b/src/plssvm/backends/OpenMP/csvm.cpp
index 10881bc5c..5ad1f175c 100644
--- a/src/plssvm/backends/OpenMP/csvm.cpp
+++ b/src/plssvm/backends/OpenMP/csvm.cpp
@@ -17,8 +17,8 @@
 #include "plssvm/backends/OpenMP/kernel/predict_kernel.hpp"                           // plssvm::openmp::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
 #include "plssvm/constants.hpp"                                                       // plssvm::real_type
 #include "plssvm/detail/assert.hpp"                                                   // PLSSVM_ASSERT
-#include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
-#include "plssvm/detail/logging.hpp"                                                  // plssvm::detail::log
+#include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::triangular_data_distribution
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"                                // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                                              // plssvm::detail::memory_size
 #include "plssvm/detail/move_only_any.hpp"                                            // plssvm::detail::{move_only_any, move_only_any_cast}
 #include "plssvm/detail/tracking/performance_tracker.hpp"                             // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY
@@ -26,6 +26,8 @@
 #include "plssvm/gamma.hpp"                                                           // plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"                                           // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                                                          // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/mpi/communicator.hpp"                                                // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"                                          // plssvm::mpi::detail::gather_and_print_csvm_information
 #include "plssvm/parameter.hpp"                                                       // plssvm::parameter
 #include "plssvm/shape.hpp"                                                           // plssvm::shape
 #include "plssvm/solver_types.hpp"                                                    // plssvm::solver_type
@@ -37,6 +39,7 @@
 
 #include <cmath>    // std::fma
 #include <cstddef>  // std::size_t
+#include <cstring>  // std::memset
 #include <tuple>    // std::tuple, std::make_tuple
 #include <utility>  // std::pair, std::make_pair, std::move
 #include <variant>  // std::get
@@ -54,15 +57,28 @@ csvm::csvm(const target_platform target) {
     throw backend_exception{ "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!" };
 #endif
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing OpenMP ({}) as backend with {} thread(s).\n\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "openmp_version", detail::get_openmp_version() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_threads", detail::get_num_threads() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::openmp }));
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", plssvm::target_platform::cpu }));
-
     // update the target platform
     target_ = plssvm::target_platform::cpu;
+
+    if (comm_.size() > 1) {
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::openmp, target_);
+    } else {
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing OpenMP ({}) as backend with {} thread(s).\n",
+                                      detail::get_openmp_version(),
+                                      detail::get_num_threads());
+    }
+
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
+
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "openmp_version", detail::get_openmp_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::openmp }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_threads", detail::get_num_threads() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() }));
 }
 
 csvm::~csvm() = default;
@@ -86,47 +102,60 @@ std::vector<::plssvm::detail::move_only_any> csvm::assemble_kernel_matrix(const
     PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
     PLSSVM_ASSERT(q_red.size() == A.num_rows() - 1, "The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!", q_red.size(), A.num_rows() - 1);
 
+    // update the data distribution: only the upper triangular kernel matrix is used
+    // note: account for the dimensional reduction
+    data_distribution_ = std::make_unique<::plssvm::detail::triangular_data_distribution>(comm_, A.num_rows() - 1, this->num_available_devices());
+    // get the triangular data distribution
+    const ::plssvm::detail::triangular_data_distribution &dist = dynamic_cast<::plssvm::detail::triangular_data_distribution &>(*data_distribution_);
+
     std::vector<::plssvm::detail::move_only_any> kernel_matrices_parts(this->num_available_devices());
     const real_type cost = real_type{ 1.0 } / params.cost;
 
-    switch (solver) {
-        case solver_type::automatic:
-            // unreachable
-            break;
-        case solver_type::cg_explicit:
-            {
-                const plssvm::detail::triangular_data_distribution dist{ A.num_rows() - 1, this->num_available_devices() };
-                std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
-                switch (params.kernel_type) {
-                    case kernel_function_type::linear:
-                        detail::device_kernel_assembly<kernel_function_type::linear>(q_red, kernel_matrix, A, QA_cost, cost);
-                        break;
-                    case kernel_function_type::polynomial:
-                        detail::device_kernel_assembly<kernel_function_type::polynomial>(q_red, kernel_matrix, A, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::rbf:
-                        detail::device_kernel_assembly<kernel_function_type::rbf>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::sigmoid:
-                        detail::device_kernel_assembly<kernel_function_type::sigmoid>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::laplacian:
-                        detail::device_kernel_assembly<kernel_function_type::laplacian>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::chi_squared:
-                        detail::device_kernel_assembly<kernel_function_type::chi_squared>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                        break;
-                }
+    if (dist.place_specific_num_rows(0) > std::size_t{ 0 }) {
+        switch (solver) {
+            case solver_type::automatic:
+                // unreachable
+                break;
+            case solver_type::cg_explicit:
+                {
+                    // calculate the number of data points this device is responsible for
+                    const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
 
-                kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
-            }
-            break;
-        case solver_type::cg_implicit:
-            {
-                // simply return data since in implicit we don't assembly the kernel matrix here!
-                kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
-            }
-            break;
+                    // get the offset of the data points this device is responsible for
+                    const std::size_t row_offset = dist.place_row_offset(0);
+
+                    std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+                    switch (params.kernel_type) {
+                        case kernel_function_type::linear:
+                            detail::device_kernel_assembly<kernel_function_type::linear>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost);
+                            break;
+                        case kernel_function_type::polynomial:
+                            detail::device_kernel_assembly<kernel_function_type::polynomial>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::rbf:
+                            detail::device_kernel_assembly<kernel_function_type::rbf>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::sigmoid:
+                            detail::device_kernel_assembly<kernel_function_type::sigmoid>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::laplacian:
+                            detail::device_kernel_assembly<kernel_function_type::laplacian>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::chi_squared:
+                            detail::device_kernel_assembly<kernel_function_type::chi_squared>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                    }
+
+                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
+                }
+                break;
+            case solver_type::cg_implicit:
+                {
+                    // simply return data since in implicit we don't assembly the kernel matrix here!
+                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
+                }
+                break;
+        }
     }
 
     return kernel_matrices_parts;
@@ -142,49 +171,79 @@ void csvm::blas_level_3(const solver_type solver, const real_type alpha, const s
     PLSSVM_ASSERT(B.shape() == C.shape(), "The B ({}) and C ({}) matrices must have the same shape!", B.shape(), C.shape());
     PLSSVM_ASSERT(B.padding() == C.padding(), "The B ({}) and C ({}) matrices must have the same padding!", B.padding(), C.padding());
 
-    switch (solver) {
-        case solver_type::automatic:
-            // unreachable
-            break;
-        case solver_type::cg_explicit:
-            {
-                const std::size_t num_rhs = B.shape().x;
-                const std::size_t num_rows = B.shape().y;
-
-                const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
-                PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
-                detail::device_kernel_symm(num_rows, num_rhs, alpha, explicit_A, B, beta, C);
-            }
-            break;
-        case solver_type::cg_implicit:
-            {
-                const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
-                PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
-                PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
-                const real_type cost = real_type{ 1.0 } / params.cost;
-
-                switch (params.kernel_type) {
-                    case kernel_function_type::linear:
-                        detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C);
-                        break;
-                    case kernel_function_type::polynomial:
-                        detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::rbf:
-                        detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::sigmoid:
-                        detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::laplacian:
-                        detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::chi_squared:
-                        detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                        break;
+    using namespace operators;
+
+    // get the triangular data distribution
+    const ::plssvm::detail::triangular_data_distribution &dist = dynamic_cast<::plssvm::detail::triangular_data_distribution &>(*data_distribution_);
+
+    // check whether the current device is responsible for at least one data point!
+    if (dist.place_specific_num_rows(0) > std::size_t{ 0 }) {
+        if (!comm_.is_main_rank()) {
+            // MPI rank 0 always touches all values in C -> other MPI ranks do not need C
+            std::memset(C.data(), 0, C.size_padded() * sizeof(real_type));
+        }
+
+        // calculate the number of data points this device is responsible for
+        const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
+        // get the offset of the data points this device is responsible for
+        const std::size_t row_offset = dist.place_row_offset(0);
+
+        const std::size_t num_rhs = B.shape().x;
+        const std::size_t num_rows = B.shape().y;
+
+        switch (solver) {
+            case solver_type::automatic:
+                // unreachable
+                break;
+            case solver_type::cg_explicit:
+                {
+                    const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
+                    PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
+
+                    detail::device_kernel_symm(num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, explicit_A, B, beta, C);
+
+                    const std::size_t num_mirror_rows = num_rows - row_offset - device_specific_num_rows;
+                    if (num_mirror_rows > std::size_t{ 0 }) {
+                        detail::device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, explicit_A, B, beta, C);
+                    }
                 }
-            }
-            break;
+                break;
+            case solver_type::cg_implicit:
+                {
+                    const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
+                    PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
+                    PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+                    const real_type cost = real_type{ 1.0 } / params.cost;
+
+                    if (comm_.is_main_rank()) {
+                        // we do not perform the beta scale in C in the cg_implicit device kernel
+                        // -> calculate it using a separate kernel (always on device 0 and MPI rank 0!)
+                        C *= beta;
+                    }
+
+                    switch (params.kernel_type) {
+                        case kernel_function_type::linear:
+                            detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C);
+                            break;
+                        case kernel_function_type::polynomial:
+                            detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::rbf:
+                            detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::sigmoid:
+                            detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::laplacian:
+                            detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::chi_squared:
+                            detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma));
+                            break;
+                    }
+                }
+                break;
+        }
     }
 }
 
@@ -213,6 +272,7 @@ aos_matrix<real_type> csvm::predict_values(const parameter &params,
 
     // defined sizes
     const std::size_t num_classes = alpha.num_rows();
+    const std::size_t num_sv = support_vectors.num_rows();
     const std::size_t num_predict_points = predict_points.num_rows();
     const std::size_t num_features = predict_points.num_cols();
 
@@ -222,33 +282,51 @@ aos_matrix<real_type> csvm::predict_values(const parameter &params,
     if (params.kernel_type == kernel_function_type::linear) {
         // special optimization for the linear kernel function
         if (w.empty()) {
+            // update the data distribution to account for the support vectors
+            data_distribution_ = std::make_unique<::plssvm::detail::rectangular_data_distribution>(comm_, num_sv, this->num_available_devices());
+
             // fill w vector
             w = soa_matrix<real_type>{ plssvm::shape{ num_classes, num_features }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
-            detail::device_kernel_w_linear(w, alpha, support_vectors);
+
+            if (data_distribution_->place_specific_num_rows(0) > std::size_t{ 0 }) {
+                const std::size_t device_specific_num_sv = data_distribution_->place_specific_num_rows(0);
+                const std::size_t sv_offset = data_distribution_->place_row_offset(0);
+
+                detail::device_kernel_w_linear(w, alpha, support_vectors, device_specific_num_sv, sv_offset);
+            }
+
+            // reduce w on all MPI ranks
+            comm_.allreduce_inplace(w);
         }
     }
 
-    // call the predict kernels
-    switch (params.kernel_type) {
-        case kernel_function_type::linear:
-            // predict the values using the w vector
-            detail::device_kernel_predict_linear(out, w, rho, predict_points);
-            break;
-        case kernel_function_type::polynomial:
-            detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, params.degree, std::get<real_type>(params.gamma), params.coef0);
-            break;
-        case kernel_function_type::rbf:
-            detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-            break;
-        case kernel_function_type::sigmoid:
-            detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma), params.coef0);
-            break;
-        case kernel_function_type::laplacian:
-            detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-            break;
-        case kernel_function_type::chi_squared:
-            detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-            break;
+    data_distribution_ = std::make_unique<::plssvm::detail::rectangular_data_distribution>(comm_, num_predict_points, this->num_available_devices());
+    const std::size_t device_specific_num_predict_points = data_distribution_->place_specific_num_rows(0);
+    const std::size_t row_offset = data_distribution_->place_row_offset(0);
+
+    if (data_distribution_->place_specific_num_rows(0) > std::size_t{ 0 }) {
+        // call the predict kernels
+        switch (params.kernel_type) {
+            case kernel_function_type::linear:
+                // predict the values using the w vector
+                detail::device_kernel_predict_linear(out, w, rho, predict_points, device_specific_num_predict_points, row_offset);
+                break;
+            case kernel_function_type::polynomial:
+                detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                break;
+            case kernel_function_type::rbf:
+                detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma));
+                break;
+            case kernel_function_type::sigmoid:
+                detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma), params.coef0);
+                break;
+            case kernel_function_type::laplacian:
+                detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma));
+                break;
+            case kernel_function_type::chi_squared:
+                detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma));
+                break;
+        }
     }
 
     return out;
diff --git a/src/plssvm/backends/SYCL/AdaptiveCpp/CMakeLists.txt b/src/plssvm/backends/SYCL/AdaptiveCpp/CMakeLists.txt
index ee139156a..7dc8bb824 100644
--- a/src/plssvm/backends/SYCL/AdaptiveCpp/CMakeLists.txt
+++ b/src/plssvm/backends/SYCL/AdaptiveCpp/CMakeLists.txt
@@ -6,6 +6,13 @@
 
 message(CHECK_START "Checking for AdaptiveCpp as SYCL implementation")
 
+# we ignore ACPP_TARGETS and overwrite it with our own values
+if (DEFINED ENV{ACPP_TARGETS})
+    message(
+        WARNING "The environment variable \"ACPP_TARGETS\" is set but will be ignored and overwritten by the values provided via \"PLSSVM_TARGET_PLATFORMS\"."
+    )
+endif ()
+
 option(PLSSVM_SYCL_BACKEND_ADAPTIVECPP_USE_GENERIC_SSCP "Enables the generic SSCP target for new AdaptiveCpp versions." ON)
 
 if (PLSSVM_SYCL_BACKEND_ADAPTIVECPP_USE_GENERIC_SSCP)
@@ -18,15 +25,13 @@ else ()
     list(TRANSFORM ACPP_TARGETS REPLACE "cpu" "omp.accelerated")
     list(TRANSFORM ACPP_TARGETS REPLACE "nvidia" "cuda")
     list(TRANSFORM ACPP_TARGETS REPLACE "amd" "hip")
-    list(TRANSFORM ACPP_TARGETS REPLACE "intel" "spirv")
     # remove CPU and Intel GPU target architectures since they are not supported when using AdaptiveCpp
     if (DEFINED PLSSVM_CPU_TARGET_ARCHS AND PLSSVM_NUM_CPU_TARGET_ARCHS GREATER 0)
         string(REPLACE ";" "," PLSSVM_CPU_TARGET_ARCHS_COMMA "${PLSSVM_CPU_TARGET_ARCHS}")
         string(REPLACE ":${PLSSVM_CPU_TARGET_ARCHS_COMMA}" "" ACPP_TARGETS "${ACPP_TARGETS}")
     endif ()
     if (DEFINED PLSSVM_INTEL_TARGET_ARCHS)
-        string(REPLACE ";" "," PLSSVM_INTEL_TARGET_ARCHS_COMMA "${PLSSVM_INTEL_TARGET_ARCHS}")
-        string(REPLACE ":${PLSSVM_INTEL_TARGET_ARCHS_COMMA}" "" ACPP_TARGETS "${ACPP_TARGETS}")
+        message(FATAL_ERROR "Intel GPUs not supported with AdaptiveCpp's old compilation flow.")
     endif ()
 endif ()
 
@@ -35,6 +40,7 @@ find_package(AdaptiveCpp CONFIG)
 if (AdaptiveCpp_FOUND)
     message(CHECK_PASS "found")
     message(STATUS "Setting ACPP_TARGETS to \"${ACPP_TARGETS}\".")
+    set_local_and_parent(ACPP_TARGETS "${ACPP_TARGETS}")
     append_local_and_parent(PLSSVM_SYCL_BACKEND_FOUND_IMPLEMENTATIONS "adaptivecpp")
 
     # set AdaptiveCpp specific targets
@@ -43,7 +49,7 @@ if (AdaptiveCpp_FOUND)
     )
 
     # set target properties
-    set(PLSSVM_SYCL_BACKEND_ADAPTIVECPP_LIBRARY_NAME plssvm-SYCL_adaptivecpp CACHE INTERNAL "")
+    set(PLSSVM_SYCL_BACKEND_ADAPTIVECPP_LIBRARY_NAME plssvm-SYCL_AdaptiveCpp CACHE INTERNAL "")
     add_library(${PLSSVM_SYCL_BACKEND_ADAPTIVECPP_LIBRARY_NAME} SHARED ${PLSSVM_SYCL_SOURCES} ${PLSSVM_SYCL_ADAPTIVECPP_SOURCES})
     append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL "${PLSSVM_SYCL_BACKEND_ADAPTIVECPP_LIBRARY_NAME}")
 
@@ -59,6 +65,7 @@ if (AdaptiveCpp_FOUND)
     target_compile_definitions(${PLSSVM_SYCL_BACKEND_ADAPTIVECPP_LIBRARY_NAME} PUBLIC PLSSVM_SYCL_BACKEND_HAS_ADAPTIVECPP)
     target_compile_definitions(${PLSSVM_SYCL_BACKEND_ADAPTIVECPP_LIBRARY_NAME} PRIVATE PLSSVM_SYCL_BACKEND_COMPILER=1)
     target_compile_definitions(${PLSSVM_SYCL_BACKEND_ADAPTIVECPP_LIBRARY_NAME} PRIVATE PLSSVM_SYCL_BACKEND_COMPILER_NAME="AdaptiveCpp")
+    target_compile_definitions(${PLSSVM_SYCL_BACKEND_ADAPTIVECPP_LIBRARY_NAME} PRIVATE PLSSVM_ACPP_TARGETS="${ACPP_TARGETS}")
 
     if (PLSSVM_SYCL_BACKEND_ADAPTIVECPP_USE_GENERIC_SSCP)
         target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_SYCL_BACKEND_ADAPTIVECPP_USE_GENERIC_SSCP)
diff --git a/src/plssvm/backends/SYCL/AdaptiveCpp/csvm.cpp b/src/plssvm/backends/SYCL/AdaptiveCpp/csvm.cpp
index 464322e3a..eaf394dd2 100644
--- a/src/plssvm/backends/SYCL/AdaptiveCpp/csvm.cpp
+++ b/src/plssvm/backends/SYCL/AdaptiveCpp/csvm.cpp
@@ -23,13 +23,17 @@
 #include "plssvm/constants.hpp"                                                     // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
 #include "plssvm/detail/assert.hpp"                                                 // PLSSVM_ASSERT
 #include "plssvm/detail/data_distribution.hpp"                                      // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
-#include "plssvm/detail/logging.hpp"                                                // plssvm::detail::log
+#include "plssvm/detail/logging/log.hpp"                                            // plssvm::detail::log
+#include "plssvm/detail/logging/log_untracked.hpp"                                  // plssvm::detail::log_untracked
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"                              // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                                            // plssvm::detail::memory_size
 #include "plssvm/detail/tracking/performance_tracker.hpp"                           // plssvm::detail::tracking::tracking_entry
 #include "plssvm/detail/utility.hpp"                                                // plssvm::detail::get_system_memory
 #include "plssvm/exceptions/exceptions.hpp"                                         // plssvm::exception
 #include "plssvm/gamma.hpp"                                                         // plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"                                         // plssvm::kernel_type
+#include "plssvm/mpi/communicator.hpp"                                              // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"                                        // plssvm::mpi::detail::gather_and_print_csvm_information
 #include "plssvm/parameter.hpp"                                                     // plssvm::parameter, plssvm::detail::parameter
 #include "plssvm/shape.hpp"                                                         // plssvm::shape
 #include "plssvm/target_platforms.hpp"                                              // plssvm::target_platform
@@ -85,54 +89,76 @@ void csvm::init(const target_platform target) {
     // At this point, target_ may NEVER be target_platform::automatic!
     PLSSVM_ASSERT(target_ != target_platform::automatic, "At this point, the target platform must be determined and must NOT be automatic!");
 
+    // throw exception if no devices for the requested target could be found
+    if (devices_.empty()) {
+        throw backend_exception{ fmt::format("SYCL backend selected but no devices for the target {} were found!", target_) };
+    }
+
     // set correct kernel invocation type if "automatic" has been provided
     if (invocation_type_ == sycl::kernel_invocation_type::automatic) {
         // always use nd_range for AdaptiveCpp
         invocation_type_ = sycl::kernel_invocation_type::nd_range;
         if (target_ == target_platform::cpu) {
-#if !defined(__HIPSYCL_USE_ACCELERATED_CPU__) && defined(__HIPSYCL_ENABLE_OMPHOST_TARGET__)
-            plssvm::detail::log(verbosity_level::full | verbosity_level::warning,
-                                "WARNING: the AdaptiveCpp automatic target for the CPU is set to nd_range, but AdaptiveCpp hasn't been build with the \"omp.accelerated\" compilation flow resulting in major performance losses!\n");
+#if !defined(__ACPP_USE_ACCELERATED_CPU__) && defined(__ACPP_ENABLE_OMPHOST_TARGET__)
+            plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                          "WARNING: the AdaptiveCpp automatic target for the CPU is set to nd_range, but AdaptiveCpp hasn't been build with the \"omp.accelerated\" compilation flow resulting in major performance losses!\n");
 #endif
         }
     }
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing AdaptiveCpp ({}) as SYCL backend with the kernel invocation type \"{}\" for the svm_kernel.\n",
-                        detail::get_adaptivecpp_version_short(),
-                        plssvm::detail::tracking::tracking_entry{ "backend", "sycl_kernel_invocation_type", invocation_type_ });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "adaptivecpp_version", detail::get_adaptivecpp_version() }));
-    if (target == target_platform::automatic) {
-        plssvm::detail::log(verbosity_level::full,
-                            "Using {} as automatic target platform.\n",
-                            target_);
-    }
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::sycl }));
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "sycl_implementation_type", plssvm::sycl::implementation_type::adaptivecpp }));
+    std::vector<std::string> device_names{};
+    device_names.reserve(devices_.size());
 
-    // throw exception if no devices for the requested target could be found
-    if (devices_.empty()) {
-        throw backend_exception{ fmt::format("SYCL backend selected but no devices for the target {} were found!", target_) };
-    }
+    if (comm_.size() > 1) {
+        // use MPI rank specific command line output
+        for (const queue_type &device : devices_) {
+            device_names.emplace_back(device.impl->sycl_queue.get_device().template get_info<::sycl::info::device::name>());
+        }
 
-    // print found SYCL devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} SYCL device(s) for the target platform {}:\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
-    std::vector<std::string> device_names;
-    device_names.reserve(devices_.size());
-    for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
-        const std::string device_name = devices_[device].impl->sycl_queue.get_device().template get_info<::sycl::info::device::name>();
-        plssvm::detail::log(verbosity_level::full,
-                            "  [{}, {}]\n",
-                            device,
-                            device_name);
-        device_names.emplace_back(device_name);
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::sycl, target_, device_names, fmt::format("{}", invocation_type_));
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing AdaptiveCpp ({}; {}) as SYCL backend with the kernel invocation type \"{}\" for the svm_kernel.\n",
+                                      detail::get_adaptivecpp_version_short(),
+                                      PLSSVM_ACPP_TARGETS,
+                                      invocation_type_);
+        if (target == target_platform::automatic) {
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "Using {} as automatic target platform.\n",
+                                          target_);
+        }
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "Found {} SYCL device(s) for the target platform {}:\n",
+                                      devices_.size(),
+                                      target_);
+
+        for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
+            const std::string device_name = devices_[device].impl->sycl_queue.get_device().template get_info<::sycl::info::device::name>();
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "  [{}, {}]\n",
+                                          device,
+                                          device_name);
+            device_names.emplace_back(device_name);
+        }
     }
+
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
+
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "adaptivecpp_version", detail::get_adaptivecpp_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::sycl }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "sycl_implementation_type", plssvm::sycl::implementation_type::adaptivecpp }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "sycl_kernel_invocation_type", invocation_type_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() }));
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names }));
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "acpp_targets", PLSSVM_ACPP_TARGETS }));
 }
 
 csvm::~csvm() {
@@ -152,9 +178,9 @@ std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const {
     for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) {
         const ::plssvm::detail::memory_size adaptivecpp_global_mem_size{ static_cast<unsigned long long>(devices_[device_id].impl->sycl_queue.get_device().get_info<::sycl::info::device::global_mem_size>()) };
         if (target_ == target_platform::cpu) {
-            plssvm::detail::log(verbosity_level::full | verbosity_level::warning,
-                                "WARNING: the returned 'global_mem_size' for AdaptiveCpp targeting the CPU device {} is nonsensical ('std::numeric_limits<std::size_t>::max()'). Using 'get_system_memory()' instead.\n",
-                                device_id);
+            plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                          "WARNING: the returned 'global_mem_size' for AdaptiveCpp targeting the CPU device {} is nonsensical ('std::numeric_limits<std::size_t>::max()'). Using 'get_system_memory()' instead.\n",
+                                          device_id);
             res[device_id] = std::min(adaptivecpp_global_mem_size, ::plssvm::detail::get_system_memory());
         } else {
             res[device_id] = adaptivecpp_global_mem_size;
diff --git a/src/plssvm/backends/SYCL/AdaptiveCpp/detail/utility.cpp b/src/plssvm/backends/SYCL/AdaptiveCpp/detail/utility.cpp
index 7e2058a99..b3bfe7b67 100644
--- a/src/plssvm/backends/SYCL/AdaptiveCpp/detail/utility.cpp
+++ b/src/plssvm/backends/SYCL/AdaptiveCpp/detail/utility.cpp
@@ -104,7 +104,7 @@ std::string get_adaptivecpp_version_short() {
 }
 
 std::string get_adaptivecpp_version() {
-    return ::hipsycl::sycl::detail::version_string();
+    return ::acpp::sycl::detail::version_string();
 }
 
 }  // namespace plssvm::adaptivecpp::detail
diff --git a/src/plssvm/backends/SYCL/CMakeLists.txt b/src/plssvm/backends/SYCL/CMakeLists.txt
index dc96f644d..b7a0fb119 100644
--- a/src/plssvm/backends/SYCL/CMakeLists.txt
+++ b/src/plssvm/backends/SYCL/CMakeLists.txt
@@ -120,6 +120,11 @@ append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_SYCL_BACKEND_LIBRARY_
 # set manpage string
 set_local_and_parent(PLSSVM_SYCL_BACKEND_NAME_LIST "automatic;${PLSSVM_SYCL_BACKEND_FOUND_IMPLEMENTATIONS}")
 
+# populate transformed ACPP_TARGETS for tests
+if (TARGET ${PLSSVM_SYCL_BACKEND_ADAPTIVECPP_LIBRARY_NAME})
+    set_local_and_parent(ACPP_TARGETS "${ACPP_TARGETS}")
+endif ()
+
 # generate summary string
 include(${PROJECT_SOURCE_DIR}/cmake/assemble_summary_string.cmake)
 set(PLSSVM_SYCL_BACKEND_SUMMARY_STRINGS "")
@@ -158,7 +163,8 @@ foreach (SYCL_IMPLEMENTATION ${PLSSVM_SYCL_BACKEND_FOUND_IMPLEMENTATIONS})
         set(SYCL_IMPLEMENTATION "icpx/${SYCL_IMPLEMENTATION}")
     endif ()
 
-    list(APPEND PLSSVM_SYCL_BACKEND_SUMMARY_STRINGS " - SYCL (${SYCL_IMPLEMENTATION}):${PLSSVM_SYCL_BACKEND_SUMMARY_STRING_ARCHS}")
+    string(REPLACE ";" "\;" PLSSVM_SYCL_BACKEND_SUMMARY_STRING_ARCHS " - SYCL (${SYCL_IMPLEMENTATION}):${PLSSVM_SYCL_BACKEND_SUMMARY_STRING_ARCHS}")
+    list(APPEND PLSSVM_SYCL_BACKEND_SUMMARY_STRINGS "${PLSSVM_SYCL_BACKEND_SUMMARY_STRING_ARCHS}")
 endforeach ()
 set(PLSSVM_SYCL_BACKEND_SUMMARY_STRINGS "${PLSSVM_SYCL_BACKEND_SUMMARY_STRINGS}" PARENT_SCOPE)
 
diff --git a/src/plssvm/backends/SYCL/DPCPP/CMakeLists.txt b/src/plssvm/backends/SYCL/DPCPP/CMakeLists.txt
index d36dadc09..d3e53ba83 100644
--- a/src/plssvm/backends/SYCL/DPCPP/CMakeLists.txt
+++ b/src/plssvm/backends/SYCL/DPCPP/CMakeLists.txt
@@ -31,7 +31,7 @@ if (PLSSVM_SYCL_BACKEND_CHECK_FOR_DPCPP_COMPILER)
     )
 
     # set target properties
-    set(PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME plssvm-SYCL_dpcpp CACHE INTERNAL "")
+    set(PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME plssvm-SYCL_DPCPP CACHE INTERNAL "")
     add_library(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} SHARED ${PLSSVM_SYCL_SOURCES} ${PLSSVM_SYCL_DPCPP_SOURCES})
     append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL "${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME}")
 
@@ -76,87 +76,9 @@ if (PLSSVM_SYCL_BACKEND_CHECK_FOR_DPCPP_COMPILER)
                 -Wno-ctor-dtor-privacy
     )
 
-    set(PLSSVM_DPCPP_FSYCL_TARGETS "")
-    # cpu targets
-    if (DEFINED PLSSVM_CPU_TARGET_ARCHS)
-        # assemble -fsycl-targets
-        list(APPEND PLSSVM_DPCPP_FSYCL_TARGETS "spir64_x86_64")
-    endif ()
-    # nvidia targets
-    if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
-        # assemble -fsycl-targets
-        list(APPEND PLSSVM_DPCPP_FSYCL_TARGETS "nvptx64-nvidia-cuda")
-        # add lineinfo for easier profiling
-        target_link_options(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -Xcuda-ptxas -lineinfo)
-        # add verbose kernel compilation information to output if in Debug mode
-        target_link_options(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE $<$<CONFIG:Debug>:-Xcuda-ptxas --verbose>)
-    endif ()
-    # amd targets
-    if (DEFINED PLSSVM_AMD_TARGET_ARCHS)
-        # assemble -fsycl-targets
-        list(APPEND PLSSVM_DPCPP_FSYCL_TARGETS "amdgcn-amd-amdhsa")
-        # add target specific flags for AOT -> must always be specified von amd targets
-        if (NOT PLSSVM_NUM_AMD_TARGET_ARCHS EQUAL 1)
-            message(SEND_ERROR "DPC++ currently only supports a single AMD architecture specification but ${PLSSVM_NUM_AMD_TARGET_ARCHS} were provided!")
-        endif ()
-        target_compile_options(
-            ${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${PLSSVM_AMD_TARGET_ARCHS}
-        )
-        target_link_options(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${PLSSVM_AMD_TARGET_ARCHS})
-    endif ()
-    # set -fsycl-targets
-    list(JOIN PLSSVM_DPCPP_FSYCL_TARGETS "," PLSSVM_DPCPP_FSYCL_TARGETS_STRING)
-    if (NOT PLSSVM_DPCPP_FSYCL_TARGETS_STRING STREQUAL "")
-        target_compile_options(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -fsycl-targets=${PLSSVM_DPCPP_FSYCL_TARGETS_STRING})
-        target_link_options(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -fsycl-targets=${PLSSVM_DPCPP_FSYCL_TARGETS_STRING})
-    endif ()
-
-    # add option for DPC++ Ahead-of-Time (AOT) compilation
-    option(PLSSVM_SYCL_BACKEND_DPCPP_ENABLE_AOT "Enables Ahead-of-Time compilation for DPC++." ON)
-    if (PLSSVM_SYCL_BACKEND_DPCPP_ENABLE_AOT)
-        message(STATUS "Enabled Ahead-of-Time (AOT) compilation with DPC++.")
-        target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_SYCL_BACKEND_DPCPP_ENABLE_AOT)
-        target_compile_definitions(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PUBLIC PLSSVM_SYCL_BACKEND_DPCPP_ENABLE_AOT)
-        # set AOT compiler flags cpu targets
-        if (DEFINED PLSSVM_CPU_TARGET_ARCHS)
-            # add target specific flags for AOT
-            if (PLSSVM_NUM_CPU_TARGET_ARCHS EQUAL 1)
-                target_compile_options(
-                    ${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}"
-                )
-                target_link_options(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}")
-            endif ()
-        endif ()
-        # nvidia targets
-        if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
-            # add target specific flags for AOT
-            if (NOT PLSSVM_NUM_NVIDIA_TARGET_ARCHS EQUAL 1)
-                message(
-                    SEND_ERROR
-                        "DPC++ currently only supports a single NVIDIA architecture specification for AOT but ${PLSSVM_NUM_NVIDIA_TARGET_ARCHS} were provided!"
-                )
-            endif ()
-            target_compile_options(
-                ${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}
-            )
-            target_link_options(
-                ${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}
-            )
-        endif ()
-        # intel targets
-        if (DEFINED PLSSVM_INTEL_TARGET_ARCHS)
-            target_compile_options(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -fsycl-targets=spir64_gen)
-            target_link_options(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -fsycl-targets=spir64_gen)
-            # add target specific flags for AOT
-            list(JOIN PLSSVM_INTEL_TARGET_ARCHS "," PLSSVM_INTEL_TARGET_ARCHS_STRING)
-            target_compile_options(
-                ${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}"
-            )
-            target_link_options(
-                ${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}"
-            )
-        endif ()
-    endif ()
+    # assemble the icpx specific compiler flags and add them to the DPCPP backend target
+    include(${PROJECT_SOURCE_DIR}/cmake/assemble_icpx_sycl_target_flags.cmake)
+    assemble_icpx_sycl_target_flags(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PRIVATE)
 
     # be able to choose between the Level-Zero and OpenCL DPC++ backend for CPUs or Intel GPUs
     option(PLSSVM_SYCL_BACKEND_DPCPP_USE_LEVEL_ZERO "Enable DPC++'s Level-Zero backend in favor of the OpenCL backend." ON)
@@ -170,18 +92,6 @@ if (PLSSVM_SYCL_BACKEND_CHECK_FOR_DPCPP_COMPILER)
         target_compile_definitions(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PUBLIC PLSSVM_SYCL_BACKEND_DPCPP_BACKEND_TYPE="opencl")
     endif ()
 
-    # be able to choose between the HIP and OpenCL DPC++ backend for AMD GPUs
-    option(PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_USE_HIP "Enable DPC++'s HIP backend in favor of the OpenCL backend for AMD GPUs." ON)
-    if (PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_USE_HIP)
-        message(STATUS "Using DPC++'s HIP backend for AMD GPUs.")
-        target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_BACKEND_TYPE="hip")
-        target_compile_definitions(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PUBLIC PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_BACKEND_TYPE="hip")
-    else ()
-        message(STATUS "Using DPC++'s OpenCL backend for AMD GPUs.")
-        target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_BACKEND_TYPE="opencl")
-        target_compile_definitions(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PUBLIC PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_BACKEND_TYPE="opencl")
-    endif ()
-
     target_link_libraries(${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME} PUBLIC ${PLSSVM_BASE_LIBRARY_NAME})
 else ()
     message(CHECK_FAIL "not found")
diff --git a/src/plssvm/backends/SYCL/DPCPP/csvm.cpp b/src/plssvm/backends/SYCL/DPCPP/csvm.cpp
index e80b697bc..2d3c85ec8 100644
--- a/src/plssvm/backends/SYCL/DPCPP/csvm.cpp
+++ b/src/plssvm/backends/SYCL/DPCPP/csvm.cpp
@@ -23,12 +23,15 @@
 #include "plssvm/constants.hpp"                                                     // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
 #include "plssvm/detail/assert.hpp"                                                 // PLSSVM_ASSERT
 #include "plssvm/detail/data_distribution.hpp"                                      // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
-#include "plssvm/detail/logging.hpp"                                                // plssvm::detail::log
+#include "plssvm/detail/logging/log_untracked.hpp"                                  // plssvm::detail::log_untracked
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"                              // plssvm::detail::log_untracked
 #include "plssvm/detail/memory_size.hpp"                                            // plssvm::detail::memory_size
 #include "plssvm/detail/tracking/performance_tracker.hpp"                           // plssvm::detail::tracking::tracking_entry
 #include "plssvm/exceptions/exceptions.hpp"                                         // plssvm::exception
 #include "plssvm/gamma.hpp"                                                         // plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"                                         // plssvm::kernel_type
+#include "plssvm/mpi/communicator.hpp"                                              // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"                                        // plssvm::mpi::detail::gather_and_print_csvm_information
 #include "plssvm/parameter.hpp"                                                     // plssvm::parameter
 #include "plssvm/shape.hpp"                                                         // plssvm::shape
 #include "plssvm/target_platforms.hpp"                                              // plssvm::target_platform
@@ -84,48 +87,70 @@ void csvm::init(const target_platform target) {
     // At this point, target_ may NEVER be target_platform::automatic!
     PLSSVM_ASSERT(target_ != target_platform::automatic, "At this point, the target platform must be determined and must NOT be automatic!");
 
+    // throw exception if no devices for the requested target could be found
+    if (devices_.empty()) {
+        throw backend_exception{ fmt::format("SYCL backend selected but no devices for the target {} were found!", target_) };
+    }
+
     // set correct kernel invocation type if "automatic" has been provided
     if (invocation_type_ == sycl::kernel_invocation_type::automatic) {
         // always use nd_range for DPC++
         invocation_type_ = sycl::kernel_invocation_type::nd_range;
     }
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing DPC++ ({}; {}) as SYCL backend with the kernel invocation type \"{}\" for the svm_kernel.\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "dpcpp_version", detail::get_dpcpp_version() },
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "dpcpp_timestamp_version", detail::get_dpcpp_timestamp_version() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "sycl_kernel_invocation_type", invocation_type_ });
-    if (target == target_platform::automatic) {
-        plssvm::detail::log(verbosity_level::full,
-                            "Using {} as automatic target platform.\n",
-                            target_);
-    }
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::sycl }));
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "sycl_implementation_type", plssvm::sycl::implementation_type::dpcpp }));
+    std::vector<std::string> device_names{};
+    device_names.reserve(devices_.size());
 
-    // throw exception if no devices for the requested target could be found
-    if (devices_.empty()) {
-        throw backend_exception{ fmt::format("SYCL backend selected but no devices for the target {} were found!", target_) };
-    }
+    if (comm_.size() > 1) {
+        // use MPI rank specific command line output
+        for (const queue_type &device : devices_) {
+            device_names.emplace_back(device.impl->sycl_queue.get_device().template get_info<::sycl::info::device::name>());
+        }
 
-    // print found SYCL devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} SYCL device(s) for the target platform {}:\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
-    std::vector<std::string> device_names;
-    device_names.reserve(devices_.size());
-    for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
-        const std::string device_name = devices_[device].impl->sycl_queue.get_device().template get_info<::sycl::info::device::name>();
-        plssvm::detail::log(verbosity_level::full,
-                            "  [{}, {}]\n",
-                            device,
-                            device_name);
-        device_names.emplace_back(device_name);
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::sycl, target_, device_names, fmt::format("{}", invocation_type_));
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing DPC++ ({}; {}) as SYCL backend with the kernel invocation type \"{}\" for the svm_kernel.\n",
+                                      detail::get_dpcpp_version(),
+                                      detail::get_dpcpp_timestamp_version(),
+                                      invocation_type_);
+        if (target == target_platform::automatic) {
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "Using {} as automatic target platform.\n",
+                                          target_);
+        }
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "Found {} SYCL device(s) for the target platform {}:\n",
+                                      devices_.size(),
+                                      target_);
+
+        for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
+            const std::string device_name = devices_[device].impl->sycl_queue.get_device().template get_info<::sycl::info::device::name>();
+            plssvm::detail::log_untracked(verbosity_level::full,
+                                          comm_,
+                                          "  [{}, {}]\n",
+                                          device,
+                                          device_name);
+            device_names.emplace_back(device_name);
+        }
     }
+
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
+
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "dpcpp_version", detail::get_dpcpp_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "dpcpp_timestamp_version", detail::get_dpcpp_timestamp_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::sycl }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "sycl_implementation_type", plssvm::sycl::implementation_type::dpcpp }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "sycl_kernel_invocation_type", invocation_type_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() }));
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names }));
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
 }
 
 csvm::~csvm() {
diff --git a/src/plssvm/backends/SYCL/DPCPP/detail/utility.cpp b/src/plssvm/backends/SYCL/DPCPP/detail/utility.cpp
index 651a6c1e1..28742b23f 100644
--- a/src/plssvm/backends/SYCL/DPCPP/detail/utility.cpp
+++ b/src/plssvm/backends/SYCL/DPCPP/detail/utility.cpp
@@ -54,15 +54,8 @@ namespace plssvm::dpcpp::detail {
                     platform_devices.insert({ target_platform::gpu_nvidia, device });
                 } else if ((::plssvm::detail::contains(vendor_string, "amd") || ::plssvm::detail::contains(vendor_string, "advanced micro devices"))
                            && ::plssvm::detail::contains(available_target_platforms, target_platform::gpu_amd)) {
-                    // select between DPC++'s OpenCL and HIP backend
-                    std::ostringstream oss;
-                    oss << device.get_backend();
-#if defined(PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_BACKEND_TYPE)
-                    if (::plssvm::detail::contains(oss.str(), PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_BACKEND_TYPE)) {
-                        platform_devices.insert({ target_platform::gpu_amd, device });
-                    }
-#endif
-                } else if (::plssvm::detail::contains(vendor_string, "intel") || ::plssvm::detail::contains(available_target_platforms, target_platform::gpu_intel)) {
+                    platform_devices.insert({ target_platform::gpu_amd, device });
+                } else if (::plssvm::detail::contains(vendor_string, "intel") && ::plssvm::detail::contains(available_target_platforms, target_platform::gpu_intel)) {
                     // select between DPC++'s OpenCL and Level-Zero backend
 #if defined(PLSSVM_SYCL_BACKEND_DPCPP_BACKEND_TYPE)
                     // get platform name of current GPU device and convert it to all lower case
diff --git a/src/plssvm/backends/stdpar/AdaptiveCpp/CMakeLists.txt b/src/plssvm/backends/stdpar/AdaptiveCpp/CMakeLists.txt
index d4a84c5b9..24c581185 100644
--- a/src/plssvm/backends/stdpar/AdaptiveCpp/CMakeLists.txt
+++ b/src/plssvm/backends/stdpar/AdaptiveCpp/CMakeLists.txt
@@ -33,12 +33,43 @@ else ()
     return()
 endif ()
 
+# we ignore ACPP_TARGETS and overwrite it with our own values
+if (DEFINED ENV{ACPP_TARGETS})
+    message(
+        WARNING "The environment variable \"ACPP_TARGETS\" is set but will be ignored and overwritten by the values provided via \"PLSSVM_TARGET_PLATFORMS\"."
+    )
+endif ()
+
+option(PLSSVM_STDPAR_BACKEND_ACPP_USE_GENERIC_SSCP "Enables the generic SSCP target for new AdaptiveCpp versions." ON)
+
+if (PLSSVM_STDPAR_BACKEND_ACPP_USE_GENERIC_SSCP)
+    message(STATUS "Using the new AdaptiveCpp SSCP compilation flow.")
+    set(ACPP_TARGETS "generic" CACHE STRING "" FORCE)
+else ()
+    message(STATUS "Using the old AdaptiveCpp compilation flow.")
+    # reformat PLSSVM_TARGET_PLATFORMS to be usable with ACPP_TARGETS
+    set(ACPP_TARGETS "${PLSSVM_TARGET_PLATFORMS}" CACHE STRING "" FORCE)
+    list(TRANSFORM ACPP_TARGETS REPLACE "cpu" "omp.accelerated")
+    list(TRANSFORM ACPP_TARGETS REPLACE "nvidia" "cuda")
+    list(TRANSFORM ACPP_TARGETS REPLACE "amd" "hip")
+    # remove CPU and Intel GPU target architectures since they are not supported when using AdaptiveCpp
+    if (DEFINED PLSSVM_CPU_TARGET_ARCHS AND PLSSVM_NUM_CPU_TARGET_ARCHS GREATER 0)
+        string(REPLACE ";" "," PLSSVM_CPU_TARGET_ARCHS_COMMA "${PLSSVM_CPU_TARGET_ARCHS}")
+        string(REPLACE ":${PLSSVM_CPU_TARGET_ARCHS_COMMA}" "" ACPP_TARGETS "${ACPP_TARGETS}")
+    endif ()
+    if (DEFINED PLSSVM_INTEL_TARGET_ARCHS)
+        message(FATAL_ERROR "Intel GPUs not supported with AdaptiveCpp's old compilation flow.")
+    endif ()
+endif ()
+# propagate ACPP_TARGETS
+target_compile_definitions(${PLSSVM_STDPAR_BACKEND_LIBRARY_INTERFACE} INTERFACE PLSSVM_ACPP_TARGETS="${ACPP_TARGETS}")
+
 # add stdpar implementation specific source file to library
 append_local_and_parent(PLSSVM_STDPAR_SOURCES ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp)
 append_local_and_parent(PLSSVM_STDPAR_SOURCES ${CMAKE_CURRENT_LIST_DIR}/../../SYCL/AdaptiveCpp/detail/utility.cpp)
 
 # set global AdaptiveCpp compiler flags
-set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG --acpp-stdpar --acpp-stdpar-unconditional-offload)
+set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG --acpp-stdpar --acpp-targets=\"${ACPP_TARGETS}\" --acpp-stdpar-unconditional-offload)
 # add flag improving CPU performance if only the CPU target is present
 if (DEFINED PLSSVM_CPU_TARGET_ARCHS AND NOT (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS OR DEFINED PLSSVM_AMD_TARGET_ARCHS OR DEFINED PLSSVM_INTEL_TARGET_ARCHS))
     set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG ${PLSSVM_STDPAR_BACKEND_COMPILER_FLAG} --acpp-stdpar-system-usm)
@@ -55,3 +86,8 @@ target_link_libraries(${PLSSVM_STDPAR_BACKEND_LIBRARY_INTERFACE} INTERFACE TBB::
 # set AdaptiveCpp compile definition
 target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_STDPAR_BACKEND_HAS_ACPP)
 target_compile_definitions(${PLSSVM_STDPAR_BACKEND_LIBRARY_INTERFACE} INTERFACE PLSSVM_STDPAR_BACKEND_HAS_ACPP)
+
+if (PLSSVM_STDPAR_BACKEND_ACPP_USE_GENERIC_SSCP)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_STDPAR_BACKEND_ACPP_USE_GENERIC_SSCP)
+    target_compile_definitions(${PLSSVM_STDPAR_BACKEND_LIBRARY_INTERFACE} INTERFACE PLSSVM_STDPAR_BACKEND_ACPP_USE_GENERIC_SSCP)
+endif ()
diff --git a/src/plssvm/backends/stdpar/AdaptiveCpp/csvm.cpp b/src/plssvm/backends/stdpar/AdaptiveCpp/csvm.cpp
index 19e09736b..8e15a2d18 100644
--- a/src/plssvm/backends/stdpar/AdaptiveCpp/csvm.cpp
+++ b/src/plssvm/backends/stdpar/AdaptiveCpp/csvm.cpp
@@ -12,8 +12,10 @@
 #include "plssvm/backends/stdpar/detail/utility.hpp"        // plssvm::stdpar::detail::{get_stdpar_version, default_device_equals_target}
 #include "plssvm/backends/stdpar/exceptions.hpp"            // plssvm::stdpar::backend_exception
 #include "plssvm/backends/stdpar/implementation_types.hpp"  // plssvm::stdpar::implementation_type
-#include "plssvm/detail/logging.hpp"                        // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"      // plssvm::detail::log_untracked
 #include "plssvm/detail/tracking/performance_tracker.hpp"   // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY
+#include "plssvm/mpi/communicator.hpp"                      // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/information.hpp"                // plssvm::mpi::detail::gather_and_print_csvm_information
 #include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                      // plssvm::verbosity_level
 
@@ -50,18 +52,13 @@ csvm::csvm(const target_platform target) {
             break;
     }
 
+    // update the target platform
     if (target == target_platform::automatic) {
         target_ = determine_default_target_platform();
     } else {
         target_ = target;
     }
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing stdpar ({}; {}) as backend.\n\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() },
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_version", detail::get_stdpar_version() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
-
     // AdaptiveCpp's stdpar per default uses the sycl default device
     const ::sycl::device default_device{};
     if (!detail::default_device_equals_target(default_device, target_)) {
@@ -70,18 +67,36 @@ csvm::csvm(const target_platform target) {
                                              target_) };
     }
 
-    // print found stdpar devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} stdpar device(s) for the target platform {}:\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
+    const std::vector<std::string> device_names{ default_device.get_info<::sycl::info::device::name>() };
+
+    if (comm_.size() > 1) {
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::stdpar, target_, device_names, fmt::format("{}", this->get_implementation_type()));
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing stdpar ({}; {}; {}) as backend.\n"
+                                      "Found {} stdpar device(s) for the target platform {}:\n"
+                                      "  [0, {}]\n",
+                                      this->get_implementation_type(),
+                                      detail::get_stdpar_version(),
+                                      PLSSVM_ACPP_TARGETS,
+                                      this->num_available_devices(),
+                                      target_,
+                                      device_names.front());
+    }
 
-    const std::string device_name = default_device.get_info<::sycl::info::device::name>();
-    plssvm::detail::log(verbosity_level::full, "  [0, {}]\n", device_name);
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_name }));
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
 
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_version", detail::get_stdpar_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names.front() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "acpp_targets", PLSSVM_ACPP_TARGETS }));
 }
 
 implementation_type csvm::get_implementation_type() const noexcept {
diff --git a/src/plssvm/backends/stdpar/GNU_TBB/csvm.cpp b/src/plssvm/backends/stdpar/GNU_TBB/csvm.cpp
index 84fcd9b14..35a3e7f58 100644
--- a/src/plssvm/backends/stdpar/GNU_TBB/csvm.cpp
+++ b/src/plssvm/backends/stdpar/GNU_TBB/csvm.cpp
@@ -12,7 +12,8 @@
 #include "plssvm/backends/stdpar/detail/utility.hpp"        // plssvm::stdpar::detail::{get_stdpar_version, default_device_equals_target}
 #include "plssvm/backends/stdpar/exceptions.hpp"            // plssvm::stdpar::backend_exception
 #include "plssvm/backends/stdpar/implementation_types.hpp"  // plssvm::stdpar::implementation_type
-#include "plssvm/detail/logging.hpp"                        // plssvm::detail::log
+#include "plssvm/detail/logging/log.hpp"                    // plssvm::detail::log
+#include "plssvm/detail/logging/log_untracked.hpp"          // plssvm::detail::log_untracked
 #include "plssvm/detail/tracking/performance_tracker.hpp"   // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY
 #include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                      // plssvm::verbosity_level
@@ -31,6 +32,7 @@ csvm::csvm(const target_platform target) {
     throw backend_exception{ "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!" };
 #endif
 
+    // update the target platform
     if (target == target_platform::automatic) {
         // GNU TBB only runs on the CPU
         target_ = target_platform::cpu;
@@ -38,20 +40,29 @@ csvm::csvm(const target_platform target) {
         target_ = target;
     }
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing stdpar ({}; {}) as backend.\n\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() },
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_version", detail::get_stdpar_version() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
+    if (comm_.size() > 1) {
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::stdpar, target_, fmt::format("{}", this->get_implementation_type()));
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing stdpar ({}; {}) as backend.\n"
+                                      "Found {} stdpar device(s) for the target platform {}:\n",
+                                      this->get_implementation_type(),
+                                      detail::get_stdpar_version(),
+                                      this->num_available_devices(),
+                                      target_);
+    }
 
-    // print found stdpar devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} stdpar device(s) for the target platform {}:\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
 
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_version", detail::get_stdpar_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() }));
 }
 
 implementation_type csvm::get_implementation_type() const noexcept {
diff --git a/src/plssvm/backends/stdpar/IntelLLVM/CMakeLists.txt b/src/plssvm/backends/stdpar/IntelLLVM/CMakeLists.txt
index f30a26494..1c236a357 100644
--- a/src/plssvm/backends/stdpar/IntelLLVM/CMakeLists.txt
+++ b/src/plssvm/backends/stdpar/IntelLLVM/CMakeLists.txt
@@ -25,7 +25,7 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
         if (PLSSVM_ENABLE_STDPAR_BACKEND MATCHES "ON")
             message(SEND_ERROR "Found requested stdpar backend using IntelLLVM, but both \"cpu\" and at least one GPU target was specified!")
         else ()
-            message(STATUS "Found stdpar backend using NVHPC, but both \"cpu\" and at least one GPU target was specified!")
+            message(STATUS "Found stdpar backend using IntelLLVM, but both \"cpu\" and at least one GPU target was specified!")
         endif ()
         message(CHECK_FAIL "skipped")
         return()
@@ -45,42 +45,20 @@ append_local_and_parent(PLSSVM_STDPAR_SOURCES ${CMAKE_CURRENT_LIST_DIR}/../../SY
 # set global IntelLLVM compiler flags
 if (DEFINED PLSSVM_CPU_TARGET_ARCHS)
     message(STATUS "CPU target platform provided, setting: \"-fsycl-pstl-offload=cpu\"")
-    set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG -fsycl -fsycl-pstl-offload=cpu -fsycl-targets=spir64_x86_64)
-    if (PLSSVM_NUM_CPU_TARGET_ARCHS EQUAL 1)
-        set_local_and_parent(
-            PLSSVM_STDPAR_BACKEND_COMPILER_FLAG ${PLSSVM_STDPAR_BACKEND_COMPILER_FLAG} -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}"
-        )
-    endif ()
+    set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG -fsycl -fsycl-pstl-offload=cpu)
 else ()
     message(STATUS "A GPU target platform provided, setting: \"-fsycl-pstl-offload=gpu\"")
     set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG -fsycl -fsycl-pstl-offload=gpu)
-
-    if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
-        set_local_and_parent(
-            PLSSVM_STDPAR_BACKEND_COMPILER_FLAG
-            ${PLSSVM_STDPAR_BACKEND_COMPILER_FLAG}
-            -fsycl-targets=nvptx64-nvidia-cuda
-            -Xsycl-target-backend=nvptx64-nvidia-cuda
-            --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}
-        )
-    elseif (DEFINED PLSSVM_AMD_TARGET_ARCHS)
-        set_local_and_parent(
-            PLSSVM_STDPAR_BACKEND_COMPILER_FLAG
-            ${PLSSVM_STDPAR_BACKEND_COMPILER_FLAG}
-            -fsycl-targets=amdgcn-amd-amdhsa
-            -Xsycl-target-backend=amdgcn-amd-amdhsa
-            --offload-arch=${PLSSVM_AMD_TARGET_ARCHS}
-        )
-    elseif (DEFINED PLSSVM_INTEL_TARGET_ARCHS)
-        set_local_and_parent(
-            PLSSVM_STDPAR_BACKEND_COMPILER_FLAG
-            ${PLSSVM_STDPAR_BACKEND_COMPILER_FLAG}
-            -fsycl-targets=spir64_gen
-            -Xsycl-target-backend=spir64_gen
-            "-device ${PLSSVM_INTEL_TARGET_ARCHS}"
-        )
-    endif ()
 endif ()
+
+# assemble the icpx specific compiler flags and add them to the dummy target -> later used to retrieve them
+include(${PROJECT_SOURCE_DIR}/cmake/assemble_icpx_sycl_target_flags.cmake)
+set(PLSSVM_STDPAR_BACKEND_DUMMY_TARGET plssvm-stdpar-intel_llvm-dummy)
+add_library(${PLSSVM_STDPAR_BACKEND_DUMMY_TARGET} INTERFACE)
+assemble_icpx_sycl_target_flags(${PLSSVM_STDPAR_BACKEND_DUMMY_TARGET} INTERFACE)
+get_target_property(PLSSVM_STDPAR_BACKEND_INTELLLVM_COMPILER_FLAG ${PLSSVM_STDPAR_BACKEND_DUMMY_TARGET} INTERFACE_COMPILE_OPTIONS)
+set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG ${PLSSVM_STDPAR_BACKEND_COMPILER_FLAG} ${PLSSVM_STDPAR_BACKEND_INTELLLVM_COMPILER_FLAG})
+
 if (PLSSVM_ENABLE_FAST_MATH)
     set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG ${PLSSVM_STDPAR_BACKEND_COMPILER_FLAG} -ffast-math)
 else ()
diff --git a/src/plssvm/backends/stdpar/IntelLLVM/csvm.cpp b/src/plssvm/backends/stdpar/IntelLLVM/csvm.cpp
index 8fb4d3fed..0ec757d8a 100644
--- a/src/plssvm/backends/stdpar/IntelLLVM/csvm.cpp
+++ b/src/plssvm/backends/stdpar/IntelLLVM/csvm.cpp
@@ -12,7 +12,8 @@
 #include "plssvm/backends/stdpar/detail/utility.hpp"        // plssvm::stdpar::detail::{get_stdpar_version, default_device_equals_target}
 #include "plssvm/backends/stdpar/exceptions.hpp"            // plssvm::stdpar::backend_exception
 #include "plssvm/backends/stdpar/implementation_types.hpp"  // plssvm::stdpar::implementation_type
-#include "plssvm/detail/logging.hpp"                        // plssvm::detail::log
+#include "plssvm/detail/logging/log.hpp"                    // plssvm::detail::log
+#include "plssvm/detail/logging/log_untracked.hpp"          // plssvm::detail::log_untracked
 #include "plssvm/detail/tracking/performance_tracker.hpp"   // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY
 #include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                      // plssvm::verbosity_level
@@ -50,19 +51,14 @@ csvm::csvm(const target_platform target) {
             break;
     }
 
+    // update the target platform
     if (target == target_platform::automatic) {
         target_ = determine_default_target_platform();
     } else {
         target_ = target;
     }
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing stdpar ({}; {}) as backend.\n\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() },
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_version", detail::get_stdpar_version() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
-
-    // AdaptiveCpp's stdpar per default uses the sycl default device
+    // IntelLLVM stdpar per default uses the sycl default device
     const ::sycl::device default_device{};
     if (!detail::default_device_equals_target(default_device, target_)) {
         throw backend_exception{ fmt::format("The default device {} doesn't match the requested target platform {}! Please set the environment variable ONEAPI_DEVICE_SELECTOR or change the target platform.",
@@ -70,18 +66,34 @@ csvm::csvm(const target_platform target) {
                                              target_) };
     }
 
-    // print found stdpar devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} stdpar device(s) for the target platform {}:\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
+    const std::vector<std::string> device_names{ default_device.get_info<::sycl::info::device::name>() };
+
+    if (comm_.size() > 1) {
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::stdpar, target_, device_names, fmt::format("{}", this->get_implementation_type()));
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing stdpar ({}; {}) as backend.\n"
+                                      "Found {} stdpar device(s) for the target platform {}:\n"
+                                      "  [0, {}]\n",
+                                      this->get_implementation_type(),
+                                      detail::get_stdpar_version(),
+                                      this->num_available_devices(),
+                                      target_,
+                                      device_names.front());
+    }
 
-    const std::string device_name = default_device.get_info<::sycl::info::device::name>();
-    plssvm::detail::log(verbosity_level::full, "  [0, {}]\n", device_name);
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_name }));
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
 
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_version", detail::get_stdpar_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names.front() }));
 }
 
 implementation_type csvm::get_implementation_type() const noexcept {
diff --git a/src/plssvm/backends/stdpar/NVHPC/csvm.cpp b/src/plssvm/backends/stdpar/NVHPC/csvm.cpp
index f91b1465a..da8286d27 100644
--- a/src/plssvm/backends/stdpar/NVHPC/csvm.cpp
+++ b/src/plssvm/backends/stdpar/NVHPC/csvm.cpp
@@ -12,7 +12,8 @@
 #include "plssvm/backends/stdpar/detail/utility.hpp"        // plssvm::stdpar::detail::get_stdpar_version
 #include "plssvm/backends/stdpar/exceptions.hpp"            // plssvm::stdpar::backend_exception
 #include "plssvm/backends/stdpar/implementation_types.hpp"  // plssvm::stdpar::implementation_type
-#include "plssvm/detail/logging.hpp"                        // plssvm::detail::log
+#include "plssvm/detail/logging/log.hpp"                    // plssvm::detail::log
+#include "plssvm/detail/logging/log_untracked.hpp"          // plssvm::detail::log_untracked
 #include "plssvm/detail/tracking/performance_tracker.hpp"   // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY
 #include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                      // plssvm::verbosity_level
@@ -50,37 +51,57 @@ csvm::csvm(const target_platform target) {
             break;
     }
 
+    // update the target platform
     if (target == target_platform::automatic) {
         target_ = determine_default_target_platform();
     } else {
         target_ = target;
     }
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing stdpar ({}; {}) as backend.\n\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() },
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_version", detail::get_stdpar_version() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
-
-    // print found stdpar devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} stdpar device(s) for the target platform {}:\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
+    std::vector<std::string> device_names{};
 
+    if (comm_.size() > 1) {
+#if defined(PLSSVM_STDPAR_BACKEND_NVHPC_GPU)
+        cudaDeviceProp prop{};
+        cudaGetDeviceProperties(&prop, 0);
+        device_names.emplace_back(prop.name);
+#endif
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::stdpar, target_, device_names, fmt::format("{}", this->get_implementation_type()));
+    } else {
+        // use more detailed single rank command line output
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing stdpar ({}; {}) as backend.\n"
+                                      "Found {} stdpar device(s) for the target platform {}:\n",
+                                      this->get_implementation_type(),
+                                      detail::get_stdpar_version(),
+                                      this->num_available_devices(),
+                                      target_);
 #if defined(PLSSVM_STDPAR_BACKEND_NVHPC_GPU)
-    cudaDeviceProp prop{};
-    cudaGetDeviceProperties(&prop, 0);
-    plssvm::detail::log(verbosity_level::full,
-                        "  [0, {}, {}.{}]\n",
-                        prop.name,
-                        prop.major,
-                        prop.minor);
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", prop.name }));
+        cudaDeviceProp prop{};
+        cudaGetDeviceProperties(&prop, 0);
+        device_names.emplace_back(prop.name);
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "  [0, {}, {}.{}]\n",
+                                      prop.name,
+                                      prop.major,
+                                      prop.minor);
 #endif
+    }
+
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
 
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_version", detail::get_stdpar_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() }));
+    if (!device_names.empty()) {
+        PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names.front() }));
+    }
 }
 
 implementation_type csvm::get_implementation_type() const noexcept {
diff --git a/src/plssvm/backends/stdpar/csvm.cpp b/src/plssvm/backends/stdpar/csvm.cpp
index 5d61f3181..9ac819664 100644
--- a/src/plssvm/backends/stdpar/csvm.cpp
+++ b/src/plssvm/backends/stdpar/csvm.cpp
@@ -14,7 +14,7 @@
 #include "plssvm/backends/stdpar/kernel/predict_kernel.hpp"                           // plssvm::stdpar::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
 #include "plssvm/constants.hpp"                                                       // plssvm::real_type
 #include "plssvm/detail/assert.hpp"                                                   // PLSSVM_ASSERT
-#include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
+#include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::triangular_data_distribution
 #include "plssvm/detail/memory_size.hpp"                                              // plssvm::detail::memory_size
 #include "plssvm/detail/move_only_any.hpp"                                            // plssvm::detail::{move_only_any, move_only_any_cast}
 #include "plssvm/detail/utility.hpp"                                                  // plssvm::detail::{get_system_memory, unreachable}
@@ -54,47 +54,60 @@ std::vector<::plssvm::detail::move_only_any> csvm::assemble_kernel_matrix(const
     PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
     PLSSVM_ASSERT(q_red.size() == A.num_rows() - 1, "The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!", q_red.size(), A.num_rows() - 1);
 
+    // update the data distribution: only the upper triangular kernel matrix is used
+    // note: account for the dimensional reduction
+    data_distribution_ = std::make_unique<::plssvm::detail::triangular_data_distribution>(comm_, A.num_rows() - 1, this->num_available_devices());
+    // get the triangular data distribution
+    const ::plssvm::detail::triangular_data_distribution &dist = dynamic_cast<::plssvm::detail::triangular_data_distribution &>(*data_distribution_);
+
     std::vector<::plssvm::detail::move_only_any> kernel_matrices_parts(this->num_available_devices());
     const real_type cost = real_type{ 1.0 } / params.cost;
 
-    switch (solver) {
-        case solver_type::automatic:
-            // unreachable
-            break;
-        case solver_type::cg_explicit:
-            {
-                const plssvm::detail::triangular_data_distribution dist{ A.num_rows() - 1, this->num_available_devices() };
-                std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
-                switch (params.kernel_type) {
-                    case kernel_function_type::linear:
-                        detail::device_kernel_assembly<kernel_function_type::linear>(q_red, kernel_matrix, A, QA_cost, cost);
-                        break;
-                    case kernel_function_type::polynomial:
-                        detail::device_kernel_assembly<kernel_function_type::polynomial>(q_red, kernel_matrix, A, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::rbf:
-                        detail::device_kernel_assembly<kernel_function_type::rbf>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::sigmoid:
-                        detail::device_kernel_assembly<kernel_function_type::sigmoid>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::laplacian:
-                        detail::device_kernel_assembly<kernel_function_type::laplacian>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::chi_squared:
-                        detail::device_kernel_assembly<kernel_function_type::chi_squared>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                        break;
-                }
+    if (dist.place_specific_num_rows(0) > std::size_t{ 0 }) {
+        switch (solver) {
+            case solver_type::automatic:
+                // unreachable
+                break;
+            case solver_type::cg_explicit:
+                {
+                    // calculate the number of data points this device is responsible for
+                    const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
 
-                kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
-            }
-            break;
-        case solver_type::cg_implicit:
-            {
-                // simply return data since in implicit we don't assembly the kernel matrix here!
-                kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
-            }
-            break;
+                    // get the offset of the data points this device is responsible for
+                    const std::size_t row_offset = dist.place_row_offset(0);
+
+                    std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+                    switch (params.kernel_type) {
+                        case kernel_function_type::linear:
+                            detail::device_kernel_assembly<kernel_function_type::linear>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost);
+                            break;
+                        case kernel_function_type::polynomial:
+                            detail::device_kernel_assembly<kernel_function_type::polynomial>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::rbf:
+                            detail::device_kernel_assembly<kernel_function_type::rbf>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::sigmoid:
+                            detail::device_kernel_assembly<kernel_function_type::sigmoid>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::laplacian:
+                            detail::device_kernel_assembly<kernel_function_type::laplacian>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::chi_squared:
+                            detail::device_kernel_assembly<kernel_function_type::chi_squared>(kernel_matrix, A, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                    }
+
+                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
+                }
+                break;
+            case solver_type::cg_implicit:
+                {
+                    // simply return data since in implicit we don't assembly the kernel matrix here!
+                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
+                }
+                break;
+        }
     }
 
     return kernel_matrices_parts;
@@ -110,50 +123,79 @@ void csvm::blas_level_3(const solver_type solver, const real_type alpha, const s
     PLSSVM_ASSERT(B.shape() == C.shape(), "The B ({}) and C ({}) matrices must have the same shape!", B.shape(), C.shape());
     PLSSVM_ASSERT(B.padding() == C.padding(), "The B ({}) and C ({}) matrices must have the same padding!", B.padding(), C.padding());
 
-    switch (solver) {
-        case solver_type::automatic:
-            // unreachable
-            break;
-        case solver_type::cg_explicit:
-            {
-                const std::size_t num_rhs = B.shape().x;
-                const std::size_t num_rows = B.shape().y;
+    using namespace operators;
 
-                const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
-                PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
+    // get the triangular data distribution
+    const ::plssvm::detail::triangular_data_distribution &dist = dynamic_cast<::plssvm::detail::triangular_data_distribution &>(*data_distribution_);
 
-                detail::device_kernel_symm(num_rows, num_rhs, alpha, explicit_A, B, beta, C);
-            }
-            break;
-        case solver_type::cg_implicit:
-            {
-                const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
-                PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
-                PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
-                const real_type cost = real_type{ 1.0 } / params.cost;
-
-                switch (params.kernel_type) {
-                    case kernel_function_type::linear:
-                        detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C);
-                        break;
-                    case kernel_function_type::polynomial:
-                        detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::rbf:
-                        detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::sigmoid:
-                        detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::laplacian:
-                        detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::chi_squared:
-                        detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                        break;
+    // check whether the current device is responsible for at least one data point!
+    if (dist.place_specific_num_rows(0) > std::size_t{ 0 }) {
+        if (!comm_.is_main_rank()) {
+            // MPI rank 0 always touches all values in C -> other MPI ranks do not need C
+            C *= real_type{ 0.0 };
+        }
+
+        // calculate the number of data points this device is responsible for
+        const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
+        // get the offset of the data points this device is responsible for
+        const std::size_t row_offset = dist.place_row_offset(0);
+
+        const std::size_t num_rhs = B.shape().x;
+        const std::size_t num_rows = B.shape().y;
+
+        switch (solver) {
+            case solver_type::automatic:
+                // unreachable
+                break;
+            case solver_type::cg_explicit:
+                {
+                    const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
+                    PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
+
+                    detail::device_kernel_symm(num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, explicit_A, B, beta, C);
+
+                    const std::size_t num_mirror_rows = num_rows - row_offset - device_specific_num_rows;
+                    if (num_mirror_rows > std::size_t{ 0 }) {
+                        detail::device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, explicit_A, B, beta, C);
+                    }
                 }
-            }
-            break;
+                break;
+            case solver_type::cg_implicit:
+                {
+                    const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
+                    PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
+                    PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+                    const real_type cost = real_type{ 1.0 } / params.cost;
+
+                    if (comm_.is_main_rank()) {
+                        // we do not perform the beta scale in C in the cg_implicit device kernel
+                        // -> calculate it using a separate kernel (always on device 0 and MPI rank 0!)
+                        C *= beta;
+                    }
+
+                    switch (params.kernel_type) {
+                        case kernel_function_type::linear:
+                            detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C);
+                            break;
+                        case kernel_function_type::polynomial:
+                            detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::rbf:
+                            detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::sigmoid:
+                            detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::laplacian:
+                            detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::chi_squared:
+                            detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<real_type>(params.gamma));
+                            break;
+                    }
+                }
+                break;
+        }
     }
 }
 
@@ -182,6 +224,7 @@ aos_matrix<real_type> csvm::predict_values(const parameter &params,
 
     // defined sizes
     const std::size_t num_classes = alpha.num_rows();
+    const std::size_t num_sv = support_vectors.num_rows();
     const std::size_t num_predict_points = predict_points.num_rows();
     const std::size_t num_features = predict_points.num_cols();
 
@@ -191,33 +234,51 @@ aos_matrix<real_type> csvm::predict_values(const parameter &params,
     if (params.kernel_type == kernel_function_type::linear) {
         // special optimization for the linear kernel function
         if (w.empty()) {
+            // update the data distribution to account for the support vectors
+            data_distribution_ = std::make_unique<::plssvm::detail::rectangular_data_distribution>(comm_, num_sv, this->num_available_devices());
+
             // fill w vector
             w = soa_matrix<real_type>{ plssvm::shape{ num_classes, num_features }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
-            detail::device_kernel_w_linear(w, alpha, support_vectors);
+
+            if (data_distribution_->place_specific_num_rows(0) > std::size_t{ 0 }) {
+                const std::size_t device_specific_num_sv = data_distribution_->place_specific_num_rows(0);
+                const std::size_t sv_offset = data_distribution_->place_row_offset(0);
+
+                detail::device_kernel_w_linear(w, alpha, support_vectors, device_specific_num_sv, sv_offset);
+            }
+
+            // reduce w on all MPI ranks
+            comm_.allreduce_inplace(w);
         }
     }
 
-    // call the predict kernels
-    switch (params.kernel_type) {
-        case kernel_function_type::linear:
-            // predict the values using the w vector
-            detail::device_kernel_predict_linear(out, w, rho, predict_points);
-            break;
-        case kernel_function_type::polynomial:
-            detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, params.degree, std::get<real_type>(params.gamma), params.coef0);
-            break;
-        case kernel_function_type::rbf:
-            detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-            break;
-        case kernel_function_type::sigmoid:
-            detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma), params.coef0);
-            break;
-        case kernel_function_type::laplacian:
-            detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-            break;
-        case kernel_function_type::chi_squared:
-            detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-            break;
+    data_distribution_ = std::make_unique<::plssvm::detail::rectangular_data_distribution>(comm_, num_predict_points, this->num_available_devices());
+    const std::size_t device_specific_num_predict_points = data_distribution_->place_specific_num_rows(0);
+    const std::size_t row_offset = data_distribution_->place_row_offset(0);
+
+    if (data_distribution_->place_specific_num_rows(0) > std::size_t{ 0 }) {
+        // call the predict kernels
+        switch (params.kernel_type) {
+            case kernel_function_type::linear:
+                // predict the values using the w vector
+                detail::device_kernel_predict_linear(out, w, rho, predict_points, device_specific_num_predict_points, row_offset);
+                break;
+            case kernel_function_type::polynomial:
+                detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                break;
+            case kernel_function_type::rbf:
+                detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma));
+                break;
+            case kernel_function_type::sigmoid:
+                detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma), params.coef0);
+                break;
+            case kernel_function_type::laplacian:
+                detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma));
+                break;
+            case kernel_function_type::chi_squared:
+                detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, device_specific_num_predict_points, row_offset, std::get<real_type>(params.gamma));
+                break;
+        }
     }
 
     return out;
diff --git a/src/plssvm/backends/stdpar/detail/utility.cpp b/src/plssvm/backends/stdpar/detail/utility.cpp
index 1c5cea958..236881ef2 100644
--- a/src/plssvm/backends/stdpar/detail/utility.cpp
+++ b/src/plssvm/backends/stdpar/detail/utility.cpp
@@ -65,6 +65,8 @@ std::string get_stdpar_version() {
     return fmt::format("{}", __VERSION__);
 #elif defined(PLSSVM_STDPAR_BACKEND_HAS_GNU_TBB)
     return fmt::format("{}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
+#elif defined(PLSSVM_STDPAR_BACKEND_HAS_HIPSTDPAR)
+    return fmt::format("{}.{}.{}", __clang_major__, __clang_minor__, __clang_patchlevel__);
 #else
     return "unknown";
 #endif
diff --git a/src/plssvm/backends/stdpar/roc-stdpar/CMakeLists.txt b/src/plssvm/backends/stdpar/roc-stdpar/CMakeLists.txt
index 0becdb54d..4d93cceb0 100644
--- a/src/plssvm/backends/stdpar/roc-stdpar/CMakeLists.txt
+++ b/src/plssvm/backends/stdpar/roc-stdpar/CMakeLists.txt
@@ -40,10 +40,33 @@ endif ()
 append_local_and_parent(PLSSVM_STDPAR_SOURCES ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp)
 
 # set global roc-stdpar compiler flags
-set_local_and_parent(
-    PLSSVM_STDPAR_BACKEND_COMPILER_FLAG --hipstdpar --offload-arch=${PLSSVM_AMD_TARGET_ARCHS}
+set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG --hipstdpar --offload-arch=${PLSSVM_AMD_TARGET_ARCHS})
+
+# be able to set --hipstdpar-interpose-alloc if necessary
+set(PLSSVM_STDPAR_BACKEND_ROCSTDPAR_USE_INTERPOSE_ALLOC AUTO CACHE STRING "Use the --hipstdpar-interpose-alloc compiler flag")
+set_property(CACHE PLSSVM_STDPAR_BACKEND_ROCSTDPAR_USE_INTERPOSE_ALLOC PROPERTY STRINGS AUTO ON OFF)
+# check whether HSA_XNACK is enabled via an environment variable
+set(PLSSVM_IS_HSA_XNACK_DISABLED OFF)
+if (NOT DEFINED ENV{HSA_XNACK})
+    set(PLSSVM_IS_HSA_XNACK_DISABLED ON)
+elseif ($ENV{HSA_XNACK} EQUAL 0)
+    set(PLSSVM_IS_HSA_XNACK_DISABLED ON)
+endif ()
+# set compiler flag accordingly
+if ((PLSSVM_STDPAR_BACKEND_ROCSTDPAR_USE_INTERPOSE_ALLOC MATCHES "AUTO" AND PLSSVM_IS_HSA_XNACK_DISABLED) OR PLSSVM_STDPAR_BACKEND_ROCSTDPAR_USE_INTERPOSE_ALLOC
+                                                                                                             MATCHES "ON"
 )
-# TODO: The flag "--hipstdpar-interpose-alloc" may be necessary for older AMD GPUs
+    message(STATUS "Using the --hipstdpar-interpose-alloc compiler flag. " "Note: this may result in the test not being able to run!")
+    set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG "${PLSSVM_STDPAR_BACKEND_COMPILER_FLAG} --hipstdpar-interpose-alloc")
+
+    # the --hipstdpar-interpose-alloc is not supported together with hardware sampling
+    if (PLSSVM_ENABLE_HARDWARE_SAMPLING)
+        message(
+            WARNING
+                "Hardware sampling is enabled together with the --hipstdpar-interpose-alloc compiler flag. This will most likely result in runtime errors. Either disable hardware sampling or the interpose alloc compiler flag!"
+        )
+    endif ()
+endif ()
 
 if (PLSSVM_ENABLE_FAST_MATH)
     set_local_and_parent(PLSSVM_STDPAR_BACKEND_COMPILER_FLAG ${PLSSVM_STDPAR_BACKEND_COMPILER_FLAG} -ffast-math)
diff --git a/src/plssvm/backends/stdpar/roc-stdpar/csvm.cpp b/src/plssvm/backends/stdpar/roc-stdpar/csvm.cpp
index 7108f7012..5fcec2460 100644
--- a/src/plssvm/backends/stdpar/roc-stdpar/csvm.cpp
+++ b/src/plssvm/backends/stdpar/roc-stdpar/csvm.cpp
@@ -9,9 +9,11 @@
 #include "plssvm/backends/stdpar/csvm.hpp"
 
 #include "plssvm/backend_types.hpp"                         // plssvm::backend_type
+#include "plssvm/backends/stdpar/detail/utility.hpp"        // plssvm::stdpar::detail::get_stdpar_version
 #include "plssvm/backends/stdpar/exceptions.hpp"            // plssvm::stdpar::backend_exception
 #include "plssvm/backends/stdpar/implementation_types.hpp"  // plssvm::stdpar::implementation_type
-#include "plssvm/detail/logging.hpp"                        // plssvm::detail::log
+#include "plssvm/detail/logging/log.hpp"                    // plssvm::detail::log
+#include "plssvm/detail/logging/log_untracked.hpp"          // plssvm::detail::log_untracked
 #include "plssvm/detail/tracking/performance_tracker.hpp"   // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY
 #include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                      // plssvm::verbosity_level
@@ -30,6 +32,7 @@ csvm::csvm(const target_platform target) {
     throw backend_exception{ "Requested target platform 'gpu_amd' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!" };
 #endif
 
+    // update the target platform
     if (target == target_platform::automatic) {
         // roc-stdpar only runs on an AMD GPU
         target_ = target_platform::gpu_amd;
@@ -37,28 +40,44 @@ csvm::csvm(const target_platform target) {
         target_ = target;
     }
 
-    plssvm::detail::log(verbosity_level::full,
-                        "\nUsing stdpar ({}) as backend.\n\n",
-                        plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() });
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
+    std::vector<std::string> device_names{};
 
-    // print found stdpar devices
-    plssvm::detail::log(verbosity_level::full,
-                        "Found {} stdpar device(s) for the target platform {}:\n",
-                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() },
-                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
+    if (comm_.size() > 1) {
+        hipDeviceProp_t prop{};
+        [[maybe_unused]] hipError_t err = hipGetDeviceProperties(&prop, 0);
+        device_names.emplace_back(prop.name);
+        mpi::detail::gather_and_print_csvm_information(comm_, plssvm::backend_type::stdpar, target_, device_names, fmt::format("{}", this->get_implementation_type()));
+    } else {
+        // use more detailed single rank command line output
+        hipDeviceProp_t prop{};
+        [[maybe_unused]] hipError_t err = hipGetDeviceProperties(&prop, 0);
+        device_names.emplace_back(prop.name);
+        plssvm::detail::log_untracked(verbosity_level::full,
+                                      comm_,
+                                      "\nUsing stdpar ({}; {}) as backend.\n"
+                                      "Found {} stdpar device(s) for the target platform {}:\n"
+                                      "  [0, {}, {}.{}]\n",
+                                      this->get_implementation_type(),
+                                      detail::get_stdpar_version(),
+                                      this->num_available_devices(),
+                                      target_,
+                                      prop.name,
+                                      prop.major,
+                                      prop.minor);
+    }
 
-    hipDeviceProp_t prop{};
-    hipGetDeviceProperties(&prop, 0);
-    plssvm::detail::log(verbosity_level::full,
-                        "  [0, {}, {}.{}]\n",
-                        prop.name,
-                        prop.major,
-                        prop.minor);
-    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", prop.name }));
+    plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                                  comm_,
+                                  "\n");
 
-    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
-                        "\n");
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_implementation", this->get_implementation_type() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "dependencies", "stdpar_version", detail::get_stdpar_version() }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::stdpar }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", this->num_available_devices() }));
+    if (!device_names.empty()) {
+        PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names.front() }));
+    }
 }
 
 implementation_type csvm::get_implementation_type() const noexcept {
diff --git a/src/plssvm/classification_report.cpp b/src/plssvm/classification_report.cpp
index e434f71e3..e1de07606 100644
--- a/src/plssvm/classification_report.cpp
+++ b/src/plssvm/classification_report.cpp
@@ -8,8 +8,9 @@
 
 #include "plssvm/classification_report.hpp"
 
-#include "plssvm/detail/logging.hpp"         // plssvm::detail::log
-#include "plssvm/detail/string_utility.hpp"  // plssvm::detail::to_lower_case
+#include "plssvm/detail/logging/log_untracked.hpp"  // plssvm::detail::log_untracked
+#include "plssvm/detail/string_utility.hpp"         // plssvm::detail::to_lower_case
+#include "plssvm/verbosity_levels.hpp"              // plssvm::verbosity_level
 
 #include "fmt/format.h"  // fmt::format
 
@@ -33,10 +34,10 @@ double sanitize_nan(const double dividend, const double divisor, const classific
         // handle the correct zero division behavior
         switch (zero_div) {
             case classification_report::zero_division_behavior::warn:
-                detail::log(verbosity_level::full,
-                            "{} is ill-defined and is set to 0.0 in labels with no predicted samples. "
-                            "Use 'plssvm::classification_report::zero_division' parameter to control this behavior.\n",
-                            metric_name);
+                detail::log_untracked(verbosity_level::full,
+                                      "{} is ill-defined and is set to 0.0 in labels with no predicted samples. "
+                                      "Use 'plssvm::classification_report::zero_division' parameter to control this behavior.\n",
+                                      metric_name);
                 [[fallthrough]];
             case classification_report::zero_division_behavior::zero:
                 return 0.0;
@@ -105,11 +106,7 @@ std::ostream &operator<<(std::ostream &out, const classification_report &report)
 
     out << '\n';
     // print accuracy and average metrics
-    if (!report.use_micro_average_) {
-        out << fmt::format("{1:>{2}}                       {3:>{4}}{5:.{0}f}   {6:>7}\n", report.output_digits_, "accuracy", max_label_string_size, "", 2 * report.output_digits_, report.accuracy_.achieved_accuracy, report.accuracy_.num_total);
-    } else {
-        out << fmt::format("{1:>{2}}       {3:.{0}f}      {4:.{0}f}      {5:.{0}f}   {6:>7}\n", report.output_digits_, "micro avg", max_label_string_size, micro_avg.precision, micro_avg.recall, micro_avg.f1, micro_avg.support);
-    }
+    out << fmt::format("{1:>{2}}                       {3:>{4}}{5:.{0}f}   {6:>7}\n", report.output_digits_, "accuracy", max_label_string_size, "", 2 * report.output_digits_, report.accuracy_.achieved_accuracy, report.accuracy_.num_total);
     out << fmt::format("{1:>{2}}       {3:.{0}f}      {4:.{0}f}      {5:.{0}f}   {6:>7}\n", report.output_digits_, "macro avg", max_label_string_size, macro_avg.precision, macro_avg.recall, macro_avg.f1, macro_avg.support);
     out << fmt::format("{1:>{2}}       {3:.{0}f}      {4:.{0}f}      {5:.{0}f}   {6:>7}\n\n", report.output_digits_, "weighted avg", max_label_string_size, weighted_avg.precision, weighted_avg.recall, weighted_avg.f1, weighted_avg.support);
     out << report.accuracy_ << std::endl;
diff --git a/src/plssvm/data_set/min_max_scaler.cpp b/src/plssvm/data_set/min_max_scaler.cpp
new file mode 100644
index 000000000..a7a897989
--- /dev/null
+++ b/src/plssvm/data_set/min_max_scaler.cpp
@@ -0,0 +1,67 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/data_set/min_max_scaler.hpp"
+
+#include "plssvm/constants.hpp"                            // plssvm::real_type
+#include "plssvm/detail/io/file_reader.hpp"                // plssvm::detail::io::file_reader
+#include "plssvm/detail/io/scaling_factors_parsing.hpp"    // plssvm::detail::io::parse_scaling_factors
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
+#include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking_entry
+#include "plssvm/exceptions/exceptions.hpp"                // plssvm::min_max_scaler_exception
+#include "plssvm/mpi/communicator.hpp"                     // plssvm::mpi::communicator
+#include "plssvm/verbosity_levels.hpp"                     // plssvm::verbosity_level
+
+#include <chrono>   // std::chrono::{time_point, steady_clock, duration_cast, milliseconds}
+#include <string>   // std::string
+#include <tuple>    // std::tie
+#include <utility>  // std::move
+#include <utility>  // std::make_pair
+
+namespace plssvm {
+
+min_max_scaler::min_max_scaler(const real_type lower, const real_type upper) :
+    min_max_scaler{ mpi::communicator{}, lower, upper } { }
+
+min_max_scaler::min_max_scaler(mpi::communicator comm, const real_type lower, const real_type upper) :
+    scaling_interval_{ std::make_pair(lower, upper) },
+    comm_{ std::move(comm) } {
+    if (lower >= upper) {
+        throw min_max_scaler_exception{ fmt::format("Inconsistent scaling interval specification: lower ({}) must be less than upper ({})!", lower, upper) };
+    }
+}
+
+min_max_scaler::min_max_scaler(const std::string &filename) :
+    min_max_scaler{ mpi::communicator{}, filename } { }
+
+min_max_scaler::min_max_scaler(mpi::communicator comm, const std::string &filename) :
+    comm_{ std::move(comm) } {
+    // open the file
+    detail::io::file_reader reader{ filename };
+    reader.read_lines('#');
+
+    // read scaling values from file
+    std::tie(scaling_interval_, scaling_factors_) = detail::io::parse_scaling_factors<factors>(reader);
+}
+
+void min_max_scaler::save(const std::string &filename) const {
+    const std::chrono::time_point start_time = std::chrono::steady_clock::now();
+
+    // write scaling values to file
+    detail::io::write_scaling_factors(filename, scaling_interval_, scaling_factors_);
+
+    const std::chrono::time_point end_time = std::chrono::steady_clock::now();
+    detail::log(verbosity_level::full | verbosity_level::timing,
+                comm_,
+                "Write {} scaling factors in {} to the file '{}'.\n",
+                detail::tracking::tracking_entry{ "scaling_factors_write", "num_scaling_factors", scaling_factors_.size() },
+                detail::tracking::tracking_entry{ "scaling_factors_write", "time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) },
+                detail::tracking::tracking_entry{ "scaling_factors_write", "filename", filename });
+}
+
+}  // namespace plssvm
diff --git a/src/plssvm/detail/cmd/parser_predict.cpp b/src/plssvm/detail/cmd/parser_predict.cpp
index 656d9a76d..c90bb62d1 100644
--- a/src/plssvm/detail/cmd/parser_predict.cpp
+++ b/src/plssvm/detail/cmd/parser_predict.cpp
@@ -8,22 +8,24 @@
 
 #include "plssvm/detail/cmd/parser_predict.hpp"
 
-#include "plssvm/backend_types.hpp"                                // plssvm::list_available_backends
-#include "plssvm/backends/Kokkos/execution_space.hpp"              // plssvm::kokkos::list_available_execution_spaces
-#include "plssvm/backends/SYCL/implementation_types.hpp"           // plssvm::sycl::list_available_sycl_implementations
-#include "plssvm/constants.hpp"                                    // plssvm::real_type
-#include "plssvm/detail/assert.hpp"                                // PLSSVM_ASSERT
-#include "plssvm/detail/logging_without_performance_tracking.hpp"  // plssvm::detail::log_untracked
-#include "plssvm/target_platforms.hpp"                             // plssvm::list_available_target_platforms
-#include "plssvm/verbosity_levels.hpp"                             // plssvm::verbosity, plssvm::verbosity_level
-#include "plssvm/version/version.hpp"                              // plssvm::version::detail::get_version_info
+#include "plssvm/backend_types.hpp"                       // plssvm::list_available_backends
+#include "plssvm/backends/Kokkos/execution_space.hpp"     // plssvm::kokkos::list_available_execution_spaces
+#include "plssvm/backends/SYCL/implementation_types.hpp"  // plssvm::sycl::list_available_sycl_implementations
+#include "plssvm/constants.hpp"                           // plssvm::real_type
+#include "plssvm/detail/assert.hpp"                       // PLSSVM_ASSERT
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"    // plssvm::detail::log_untracked
+#include "plssvm/exceptions/exceptions.hpp"               // plssvm::cmd_parser_exit
+#include "plssvm/mpi/communicator.hpp"                    // plssvm::mpi::communicator
+#include "plssvm/target_platforms.hpp"                    // plssvm::list_available_target_platforms
+#include "plssvm/verbosity_levels.hpp"                    // plssvm::verbosity, plssvm::verbosity_level
+#include "plssvm/version/version.hpp"                     // plssvm::version::detail::get_version_info
 
 #include "cxxopts.hpp"   // cxxopts::{Options, value, ParseResult}
 #include "fmt/color.h"   // fmt::fg, fmt::color::orange
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
 
-#include <cstdlib>      // std::exit, EXIT_SUCCESS, EXIT_FAILURE
+#include <cstdlib>      // EXIT_SUCCESS, EXIT_FAILURE
 #include <exception>    // std::exception
 #include <filesystem>   // std::filesystem::path
 #include <iostream>     // std::cout, std::cerr, std::endl
@@ -32,7 +34,7 @@
 
 namespace plssvm::detail::cmd {
 
-parser_predict::parser_predict(int argc, char **argv) {
+parser_predict::parser_predict(const mpi::communicator &comm, int argc, char **argv) {
     // check for basic argc and argv correctness
     PLSSVM_ASSERT(argc >= 1, fmt::format("At least one argument is always given (the executable name), but argc is {}!", argc));
     PLSSVM_ASSERT(argv != nullptr, "At least one argument is always given (the executable name), but argv is a nullptr!");
@@ -57,6 +59,9 @@ parser_predict::parser_predict(int argc, char **argv) {
 #endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
            ("performance_tracking", "the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr", cxxopts::value<decltype(performance_tracking_filename)>())
+#endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+           ("mpi_load_balancing_weights", "can be used to load balance for MPI (must be integers); number of provided values must match the number of MPI ranks", cxxopts::value<decltype(mpi_load_balancing_weights)>())
 #endif
             ("use_strings_as_labels", "use strings as labels instead of plane numbers", cxxopts::value<decltype(strings_as_labels)>()->default_value(fmt::format("{}", strings_as_labels)))
             ("verbosity", fmt::format("choose the level of verbosity: full|timing|libsvm|quiet (default: {})", fmt::format("{}", verbosity)), cxxopts::value<verbosity_level>())
@@ -74,28 +79,36 @@ parser_predict::parser_predict(int argc, char **argv) {
         options.parse_positional({ "test", "model", "output" });
         result = options.parse(argc, argv);
     } catch (const std::exception &e) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
 
     // print help message and exit
     if (result.count("help")) {
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_SUCCESS);
+        if (comm.is_main_rank()) {
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_SUCCESS };
     }
 
     // print version info
     if (result.count("version")) {
-        std::cout << version::detail::get_version_info("plssvm-predict") << std::endl;
-        std::exit(EXIT_SUCCESS);
+        if (comm.is_main_rank()) {
+            std::cout << version::detail::get_version_info("plssvm-predict") << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_SUCCESS };
     }
 
     // check if the number of positional arguments is not too large
     if (!result.unmatched().empty()) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to three positional options may be given, but {} (\"{}\") additional option(s) where provided!", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to three positional options may be given, but {} (\"{}\") additional option(s) where provided!", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
 
     // parse backend_type and cast the value to the respective enum
@@ -116,6 +129,7 @@ parser_predict::parser_predict(int argc, char **argv) {
         // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend
         if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n",
                                   sycl_implementation_type);
         }
@@ -134,6 +148,7 @@ parser_predict::parser_predict(int argc, char **argv) {
         // warn if the kokkos execution space is explicitly set but Kokkos isn't the current (automatic) backend
         if (!kokkos_backend_is_used && kokkos_execution_space != kokkos::execution_space::automatic) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: explicitly set a Kokkos execution space but the current backend isn't Kokkos; ignoring --kokkos_execution_space={}\n",
                                   kokkos_execution_space);
         }
@@ -151,6 +166,7 @@ parser_predict::parser_predict(int argc, char **argv) {
         const verbosity_level verb = result["verbosity"].as<verbosity_level>();
         if (quiet && verb != verbosity_level::quiet) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: explicitly set the -q/--quiet flag, but the provided verbosity level isn't \"quiet\"; setting --verbosity={} to --verbosity=quiet\n",
                                   verb);
             verbosity = verbosity_level::quiet;
@@ -163,17 +179,21 @@ parser_predict::parser_predict(int argc, char **argv) {
 
     // parse test data filename
     if (!result.count("test")) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing test file!\n") << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing test file!\n") << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
     input_filename = result["test"].as<decltype(input_filename)>();
 
     // parse model filename
     if (!result.count("model")) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing model file!\n") << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing model file!\n") << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
     model_filename = result["model"].as<decltype(model_filename)>();
 
@@ -185,10 +205,28 @@ parser_predict::parser_predict(int argc, char **argv) {
         predict_filename = input_path.filename().string() + ".predict";
     }
 
+#if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     // parse performance tracking filename
     if (result.count("performance_tracking")) {
         performance_tracking_filename = result["performance_tracking"].as<decltype(performance_tracking_filename)>();
     }
+#endif
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    // parse MPI load balancing factors
+    if (result.count("mpi_load_balancing_weights")) {
+        mpi_load_balancing_weights = result["mpi_load_balancing_weights"].as<decltype(mpi_load_balancing_weights)>();
+
+        // sanity check provided balance factors
+        if (mpi_load_balancing_weights.size() != comm.size()) {
+            if (comm.is_main_rank()) {
+                std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: the number of load balancing weights ({}) must match the number of MPI ranks ({})!\n", mpi_load_balancing_weights.size(), comm.size()) << std::endl;
+                std::cout << options.help() << std::endl;
+            }
+            throw cmd_parser_exit{ EXIT_FAILURE };
+        }
+    }
+#endif
 }
 
 std::ostream &operator<<(std::ostream &out, const parser_predict &params) {
@@ -221,6 +259,9 @@ std::ostream &operator<<(std::ostream &out, const parser_predict &params) {
     if (!params.performance_tracking_filename.empty()) {
         out << fmt::format("performance tracking file: '{}'\n", params.performance_tracking_filename);
     }
+    if (!params.mpi_load_balancing_weights.empty()) {
+        out << fmt::format("mpi load-balancing weights: [{}]\n", fmt::join(params.mpi_load_balancing_weights, ", "));
+    }
 
     return out;
 }
diff --git a/src/plssvm/detail/cmd/parser_scale.cpp b/src/plssvm/detail/cmd/parser_scale.cpp
index 4df557a07..cd03fe5d6 100644
--- a/src/plssvm/detail/cmd/parser_scale.cpp
+++ b/src/plssvm/detail/cmd/parser_scale.cpp
@@ -8,24 +8,27 @@
 
 #include "plssvm/detail/cmd/parser_scale.hpp"
 
-#include "plssvm/detail/assert.hpp"                                // PLSSVM_ASSERT
-#include "plssvm/detail/logging_without_performance_tracking.hpp"  // plssvm::detail::log_untracked
-#include "plssvm/verbosity_levels.hpp"                             // plssvm::verbosity, plssvm::verbosity_level
-#include "plssvm/version/version.hpp"                              // plssvm::version::detail::get_version_info
+#include "plssvm/detail/assert.hpp"                     // PLSSVM_ASSERT
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"  // plssvm::detail::log_untracked
+#include "plssvm/exceptions/exceptions.hpp"             // plssvm::cmd_parser_exit
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
+#include "plssvm/mpi/environment.hpp"                   // plssvm::mpi::{is_active, finalize}
+#include "plssvm/verbosity_levels.hpp"                  // plssvm::verbosity, plssvm::verbosity_level
+#include "plssvm/version/version.hpp"                   // plssvm::version::detail::get_version_info
 
 #include "cxxopts.hpp"   // cxxopts::{Options, value, ParseResult}
 #include "fmt/color.h"   // fmt::fg, fmt::color::red
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
 
-#include <cstdlib>      // std::exit, EXIT_SUCCESS, EXIT_FAILURE
+#include <cstdlib>      // EXIT_SUCCESS, EXIT_FAILURE
 #include <exception>    // std::exception
 #include <iostream>     // std::cout, std::cerr, std::endl
 #include <type_traits>  // std::is_same_v
 
 namespace plssvm::detail::cmd {
 
-parser_scale::parser_scale(int argc, char **argv) {
+parser_scale::parser_scale(const mpi::communicator &comm, int argc, char **argv) {
     // check for basic argc and argv correctness
     PLSSVM_ASSERT(argc >= 1, fmt::format("At least one argument is always given (the executable name), but argc is {}!", argc));
     PLSSVM_ASSERT(argv != nullptr, "At least one argument is always given (the executable name), but argv is a nullptr!");
@@ -63,28 +66,36 @@ parser_scale::parser_scale(int argc, char **argv) {
         options.parse_positional({ "input", "scaled" });
         result = options.parse(argc, argv);
     } catch (const std::exception &e) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
 
     // print help message and exit
     if (result.count("help")) {
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_SUCCESS);
+        if (comm.is_main_rank()) {
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_SUCCESS };
     }
 
     // print version info
     if (result.count("version")) {
-        std::cout << version::detail::get_version_info("plssvm-scale", false) << std::endl;
-        std::exit(EXIT_SUCCESS);
+        if (comm.is_main_rank()) {
+            std::cout << version::detail::get_version_info("plssvm-scale", false) << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_SUCCESS };
     }
 
     // check if the number of positional arguments is not too large
     if (!result.unmatched().empty()) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to two positional options may be given, but {} (\"{}\") additional option(s) where provided!\n", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to two positional options may be given, but {} (\"{}\") additional option(s) where provided!\n", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
 
     // parse the lowest allowed value
@@ -95,9 +106,11 @@ parser_scale::parser_scale(int argc, char **argv) {
 
     // lower must be strictly less than upper!
     if (lower >= upper) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: invalid scaling range [lower, upper] with [{}, {}]!\n", lower, upper) << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: invalid scaling range [lower, upper] with [{}, {}]!\n", lower, upper) << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
 
     // parse the file format
@@ -114,6 +127,7 @@ parser_scale::parser_scale(int argc, char **argv) {
         const verbosity_level verb = result["verbosity"].as<verbosity_level>();
         if (quiet && verb != verbosity_level::quiet) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: explicitly set the -q/--quiet flag, but the provided verbosity level isn't \"quiet\"; setting --verbosity={} to --verbosity=quiet\n",
                                   verb);
             verbosity = verbosity_level::quiet;
@@ -126,9 +140,11 @@ parser_scale::parser_scale(int argc, char **argv) {
 
     // parse input data filename
     if (!result.count("input")) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing input file!\n") << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing input file!\n") << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
     input_filename = result["input"].as<decltype(input_filename)>();
 
@@ -139,9 +155,11 @@ parser_scale::parser_scale(int argc, char **argv) {
 
     // can only use one of save_filename or restore_filename
     if (result.count("save_filename") && result.count("restore_filename")) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: cannot use -s (--save_filename) and -r (--restore_filename) simultaneously!\n") << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: cannot use -s (--save_filename) and -r (--restore_filename) simultaneously!\n") << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
 
     // parse the file name to save the calculated weights to
@@ -153,15 +171,18 @@ parser_scale::parser_scale(int argc, char **argv) {
     if (result.count("restore_filename")) {
         if (result.count("lower") || result.count("upper")) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: provided -l (--lower) and/or -u (--upper) together with -r (--restore_filename); ignoring -l/-u\n");
         }
         restore_filename = result["restore_filename"].as<decltype(restore_filename)>();
     }
 
+#if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     // parse performance tracking filename
     if (result.count("performance_tracking")) {
         performance_tracking_filename = result["performance_tracking"].as<decltype(performance_tracking_filename)>();
     }
+#endif
 }
 
 std::ostream &operator<<(std::ostream &out, const parser_scale &params) {
diff --git a/src/plssvm/detail/cmd/parser_train.cpp b/src/plssvm/detail/cmd/parser_train.cpp
index e9f0004b4..fdb0070c9 100644
--- a/src/plssvm/detail/cmd/parser_train.cpp
+++ b/src/plssvm/detail/cmd/parser_train.cpp
@@ -8,28 +8,31 @@
 
 #include "plssvm/detail/cmd/parser_train.hpp"
 
-#include "plssvm/backend_types.hpp"                                // plssvm::list_available_backends, plssvm::determine_default_backend
-#include "plssvm/backends/Kokkos/execution_space.hpp"              // plssvm::kokkos::{list_available_execution_spaces, execution_space}
-#include "plssvm/backends/SYCL/implementation_types.hpp"           // plssvm::sycl::{list_available_sycl_implementations, implementation_type}
-#include "plssvm/backends/SYCL/kernel_invocation_types.hpp"        // plssvm::sycl::kernel_invocation_type
-#include "plssvm/classification_types.hpp"                         // plssvm::classification_type, plssvm::classification_type_to_full_string
-#include "plssvm/constants.hpp"                                    // plssvm::real_type
-#include "plssvm/detail/assert.hpp"                                // PLSSVM_ASSERT
-#include "plssvm/detail/logging_without_performance_tracking.hpp"  // plssvm::detail::log_untracked
-#include "plssvm/detail/utility.hpp"                               // plssvm::detail::to_underlying
-#include "plssvm/gamma.hpp"                                        // plssvm::get_gamma_string
-#include "plssvm/kernel_function_types.hpp"                        // plssvm::kernel_type_to_math_string
-#include "plssvm/svm_types.hpp"                                    // plssvm::svm_type
-#include "plssvm/target_platforms.hpp"                             // plssvm::list_available_target_platforms
-#include "plssvm/verbosity_levels.hpp"                             // plssvm::verbosity, plssvm::verbosity_level
-#include "plssvm/version/version.hpp"                              // plssvm::version::detail::get_version_info
+#include "plssvm/backend_types.hpp"                          // plssvm::list_available_backends, plssvm::determine_default_backend
+#include "plssvm/backends/Kokkos/execution_space.hpp"        // plssvm::kokkos::{list_available_execution_spaces, execution_space}
+#include "plssvm/backends/SYCL/implementation_types.hpp"     // plssvm::sycl::{list_available_sycl_implementations, implementation_type}
+#include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
+#include "plssvm/classification_types.hpp"                   // plssvm::classification_type, plssvm::classification_type_to_full_string
+#include "plssvm/constants.hpp"                              // plssvm::real_type
+#include "plssvm/detail/assert.hpp"                          // PLSSVM_ASSERT
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"       // plssvm::detail::log_untracked
+#include "plssvm/detail/utility.hpp"                         // plssvm::detail::to_underlying
+#include "plssvm/exceptions/exceptions.hpp"                  // plssvm::cmd_parser_exit
+#include "plssvm/gamma.hpp"                                  // plssvm::get_gamma_string
+#include "plssvm/kernel_function_types.hpp"                  // plssvm::kernel_type_to_math_string
+#include "plssvm/mpi/communicator.hpp"                       // plssvm::mpi::communicator
+#include "plssvm/mpi/environment.hpp"                        // plssvm::mpi::{is_active, finalize}
+#include "plssvm/svm_types.hpp"                              // plssvm::svm_type
+#include "plssvm/target_platforms.hpp"                       // plssvm::list_available_target_platforms
+#include "plssvm/verbosity_levels.hpp"                       // plssvm::verbosity, plssvm::verbosity_level
+#include "plssvm/version/version.hpp"                        // plssvm::version::detail::get_version_info
 
 #include "cxxopts.hpp"   // cxxopts::Options, cxxopts::value,cxxopts::ParseResult
 #include "fmt/color.h"   // fmt::fg, fmt::color::red
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
 
-#include <cstdlib>      // std::exit, EXIT_SUCCESS, EXIT_FAILURE
+#include <cstdlib>      // EXIT_SUCCESS, EXIT_FAILURE
 #include <exception>    // std::exception
 #include <filesystem>   // std::filesystem::path
 #include <iostream>     // std::cout, std::cerr, std::endl
@@ -40,7 +43,7 @@
 
 namespace plssvm::detail::cmd {
 
-parser_train::parser_train(int argc, char **argv) {
+parser_train::parser_train(const mpi::communicator &comm, int argc, char **argv) {
     // check for basic argc and argv correctness
     PLSSVM_ASSERT(argc >= 1, fmt::format("At least one argument is always given (the executable name), but argc is {}!", argc));
     PLSSVM_ASSERT(argv != nullptr, "At least one argument is always given (the executable name), but argv is a nullptr!");
@@ -85,6 +88,9 @@ parser_train::parser_train(int argc, char **argv) {
 #endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
            ("performance_tracking", "the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr", cxxopts::value<decltype(performance_tracking_filename)>())
+#endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+           ("mpi_load_balancing_weights", "can be used to load balance for MPI (must be integers); number of provided values must match the number of MPI ranks", cxxopts::value<decltype(mpi_load_balancing_weights)>())
 #endif
            ("use_strings_as_labels", "use strings as labels for the classification task instead of plane numbers", cxxopts::value<decltype(strings_as_labels)>()->default_value(fmt::format("{}", strings_as_labels)))
            ("verbosity", fmt::format("choose the level of verbosity: full|timing|libsvm|quiet (default: {})", fmt::format("{}", verbosity)), cxxopts::value<verbosity_level>())
@@ -101,28 +107,36 @@ parser_train::parser_train(int argc, char **argv) {
         options.parse_positional({ "input", "model" });
         result = options.parse(argc, argv);
     } catch (const std::exception &e) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
 
     // print help message and exit
     if (result.count("help")) {
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_SUCCESS);
+        if (comm.is_main_rank()) {
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_SUCCESS };
     }
 
     // print version info
     if (result.count("version")) {
-        std::cout << version::detail::get_version_info("plssvm-train") << std::endl;
-        std::exit(EXIT_SUCCESS);
+        if (comm.is_main_rank()) {
+            std::cout << version::detail::get_version_info("plssvm-train") << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_SUCCESS };
     }
 
     // check if the number of positional arguments is not too large
     if (!result.unmatched().empty()) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to two positional options may be given, but {} (\"{}\") additional option(s) where provided!\n", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to two positional options may be given, but {} (\"{}\") additional option(s) where provided!\n", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
 
     // parse svm_type and cast the value to the respective enum
@@ -145,9 +159,11 @@ parser_train::parser_train(int argc, char **argv) {
         const decltype(csvm_params.gamma) gamma_input = result["gamma"].as<decltype(csvm_params.gamma)>();
         // check if the provided gamma is legal iff a real_type has been provided
         if (std::holds_alternative<real_type>(gamma_input) && std::get<real_type>(gamma_input) <= real_type{ 0.0 }) {
-            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: gamma must be greater than 0.0, but is {}!\n", std::get<real_type>(gamma_input)) << std::endl;
-            std::cout << options.help() << std::endl;
-            std::exit(EXIT_FAILURE);
+            if (comm.is_main_rank()) {
+                std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: gamma must be greater than 0.0, but is {}!\n", std::get<real_type>(gamma_input)) << std::endl;
+                std::cout << options.help() << std::endl;
+            }
+            throw cmd_parser_exit{ EXIT_FAILURE };
         }
         // provided gamma was legal -> override default value
         csvm_params.gamma = gamma_input;
@@ -173,9 +189,11 @@ parser_train::parser_train(int argc, char **argv) {
         const auto max_iter_input = result["max_iter"].as<long long int>();
         // check if the provided max_iter is legal
         if (max_iter_input <= decltype(max_iter_input){ 0 }) {
-            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: max_iter must be greater than 0, but is {}!\n", max_iter_input) << std::endl;
-            std::cout << options.help() << std::endl;
-            std::exit(EXIT_FAILURE);
+            if (comm.is_main_rank()) {
+                std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: max_iter must be greater than 0, but is {}!\n", max_iter_input) << std::endl;
+                std::cout << options.help() << std::endl;
+            }
+            throw cmd_parser_exit{ EXIT_FAILURE };
         }
         // provided max_iter was legal -> override default value
         max_iter = static_cast<decltype(max_iter)>(max_iter_input);
@@ -188,6 +206,7 @@ parser_train::parser_train(int argc, char **argv) {
         // warn if a classification type has been provided, but the SVM type is a C-SVR (regression)
         if (svm == svm_type::csvr) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: explicitly set a classification type but the current svm_type is a C-SVR; ignoring --classification={}\n",
                                   classification);
         }
@@ -214,6 +233,7 @@ parser_train::parser_train(int argc, char **argv) {
         // warn if kernel invocation type is explicitly set but SYCL isn't the current (automatic) backend
         if (!sycl_backend_is_used && sycl_kernel_invocation_type != sycl::kernel_invocation_type::automatic) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: explicitly set a SYCL kernel invocation type but the current backend isn't SYCL; ignoring --sycl_kernel_invocation_type={}\n",
                                   sycl_kernel_invocation_type);
         }
@@ -224,6 +244,7 @@ parser_train::parser_train(int argc, char **argv) {
         // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend
         if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n",
                                   sycl_implementation_type);
         }
@@ -242,6 +263,7 @@ parser_train::parser_train(int argc, char **argv) {
         // warn if the kokkos execution space is explicitly set but Kokkos isn't the current (automatic) backend
         if (!kokkos_backend_is_used && kokkos_execution_space != kokkos::execution_space::automatic) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: explicitly set a Kokkos execution space but the current backend isn't Kokkos; ignoring --kokkos_execution_space={}\n",
                                   kokkos_execution_space);
         }
@@ -252,6 +274,7 @@ parser_train::parser_train(int argc, char **argv) {
     strings_as_labels = result["use_strings_as_labels"].as<decltype(strings_as_labels)>();
     if (svm != svm_type::csvc && strings_as_labels) {
         detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                              comm,
                               "WARNING: explicitly requested string labels for the regression task; ignoring --use_strings_as_labels\n");
     }
 
@@ -263,6 +286,7 @@ parser_train::parser_train(int argc, char **argv) {
         const verbosity_level verb = result["verbosity"].as<verbosity_level>();
         if (quiet && verb != verbosity_level::quiet) {
             detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  comm,
                                   "WARNING: explicitly set the -q/--quiet flag, but the provided verbosity level isn't \"quiet\"; setting --verbosity={} to --verbosity=quiet\n",
                                   verb);
             verbosity = verbosity_level::quiet;
@@ -275,9 +299,11 @@ parser_train::parser_train(int argc, char **argv) {
 
     // parse input data filename
     if (!result.count("input")) {
-        std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing input file!\n") << std::endl;
-        std::cout << options.help() << std::endl;
-        std::exit(EXIT_FAILURE);
+        if (comm.is_main_rank()) {
+            std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing input file!\n") << std::endl;
+            std::cout << options.help() << std::endl;
+        }
+        throw cmd_parser_exit{ EXIT_FAILURE };
     }
     input_filename = result["input"].as<decltype(input_filename)>();
 
@@ -289,10 +315,28 @@ parser_train::parser_train(int argc, char **argv) {
         model_filename = input_path.filename().string() + ".model";
     }
 
+#if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     // parse performance tracking filename
     if (result.count("performance_tracking")) {
         performance_tracking_filename = result["performance_tracking"].as<decltype(performance_tracking_filename)>();
     }
+#endif
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    // parse MPI load balancing factors
+    if (result.count("mpi_load_balancing_weights")) {
+        mpi_load_balancing_weights = result["mpi_load_balancing_weights"].as<decltype(mpi_load_balancing_weights)>();
+
+        // sanity check provided balance factors
+        if (mpi_load_balancing_weights.size() != comm.size()) {
+            if (comm.is_main_rank()) {
+                std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: the number of load balancing weights ({}) must match the number of MPI ranks ({})!\n", mpi_load_balancing_weights.size(), comm.size()) << std::endl;
+                std::cout << options.help() << std::endl;
+            }
+            throw cmd_parser_exit{ EXIT_FAILURE };
+        }
+    }
+#endif
 }
 
 std::ostream &operator<<(std::ostream &out, const parser_train &params) {
@@ -363,9 +407,14 @@ std::ostream &operator<<(std::ostream &out, const parser_train &params) {
         std::is_same_v<real_type, float> ? "float" : "double",
         params.input_filename,
         params.model_filename);
+
     if (!params.performance_tracking_filename.empty()) {
         out << fmt::format("performance tracking file: '{}'\n", params.performance_tracking_filename);
     }
+    if (!params.mpi_load_balancing_weights.empty()) {
+        out << fmt::format("mpi load-balancing weights: [{}]\n", fmt::join(params.mpi_load_balancing_weights, ", "));
+    }
+
     return out;
 }
 
diff --git a/src/plssvm/detail/data_distribution.cpp b/src/plssvm/detail/data_distribution.cpp
index dc979761e..3f3e42678 100644
--- a/src/plssvm/detail/data_distribution.cpp
+++ b/src/plssvm/detail/data_distribution.cpp
@@ -11,13 +11,18 @@
 #include "plssvm/constants.hpp"           // plssvm::PADDING_SIZE
 #include "plssvm/detail/assert.hpp"       // PLSSVM_ASSERT
 #include "plssvm/detail/memory_size.hpp"  // plssvm::detail::memory_size
+#include "plssvm/mpi/communicator.hpp"    // plssvm::mpi::communicator
 
-#include "fmt/format.h"  // fmt::format, fmt::runtime
+#include "fmt/base.h"    // fmt::runtime
+#include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
 
-#include <algorithm>  // std::max, std::fill
+#include <algorithm>  // std::max
 #include <cstddef>    // std::size_t
+#include <numeric>    // std::accumulate, std::gcd, std::exclusive_scan
+#include <optional>   // std::optional
 #include <ostream>    // std::ostream
+#include <utility>    // std::move
 #include <vector>     // std::vector
 
 [[nodiscard]] std::size_t calculate_data_set_num_entries(const std::size_t num_data_points, const std::size_t num_features) noexcept {
@@ -34,26 +39,53 @@
 
 namespace plssvm::detail {
 
-data_distribution::data_distribution(const std::size_t num_rows, const std::size_t num_places) :
-    distribution_(num_places + 1),
+data_distribution::data_distribution(mpi::communicator comm, const std::size_t num_rows, const std::size_t num_places) :
     num_rows_{ num_rows },
-    num_places_{ num_places } {
-    PLSSVM_ASSERT(num_rows_ > 0, "At least one row must be present!");
-    PLSSVM_ASSERT(num_places_ > 0, "At least one place must be present!");
+    num_places_{ num_places },
+    comm_{ std::move(comm) } {
+    PLSSVM_ASSERT(num_rows > 0, "At least one row must be present!");
+    PLSSVM_ASSERT(num_places > 0, "At least one place must be present!");
+
+    // gather the number of places from all MPI ranks on all MPI ranks
+    places_ = comm_.allgather(num_places_);
+
+    // calculate the total number of places
+    total_num_places_ = std::accumulate(places_.cbegin(), places_.cend(), std::size_t{ 0 });
+    // calculate the prefix sum given the places
+    std::vector<std::size_t> offsets(places_.size());
+    std::exclusive_scan(places_.cbegin(), places_.cend(), offsets.begin(), std::size_t{ 0 });
+    rank_places_offset_ = offsets[comm_.rank()];
+
+    // check whether there are load balancing weights
+    const std::optional<std::vector<std::size_t>> weights = comm_.get_load_balancing_weights();
+    if (weights.has_value()) {
+        // get the load balancing weights -> reduce them to reduce the allocation side later
+        const std::size_t gcd = std::accumulate(weights->cbegin(), weights->cend(), weights->front(), std::gcd<std::size_t, std::size_t>);
+        load_balancing_weights_.resize(weights->size());
+        for (std::size_t i = 0; i < load_balancing_weights_.size(); ++i) {
+            load_balancing_weights_[i] = weights.value()[i] / gcd;
+        }
+    } else {
+        // no load balancing weights -> determine default weights -> equals to the place distribution
+        load_balancing_weights_ = places_;
+    }
+
+    // create distribution
+    distribution_ = std::vector<std::size_t>(total_num_places_ + 1);
 }
 
 data_distribution::~data_distribution() = default;
 
 std::size_t data_distribution::place_specific_num_rows(const std::size_t place) const noexcept {
     PLSSVM_ASSERT(distribution_.size() >= 2, "At least one place must be present and, therefore, the distribution vector must contain at least two entries!");
-    PLSSVM_ASSERT(place < distribution_.size() - 1, "The queried place can at most be {}, but is {}!", distribution_.size() - 1, place);
-    return distribution_[place + 1] - distribution_[place];
+    PLSSVM_ASSERT(rank_places_offset_ + place < distribution_.size() - 1, "The queried place can at most be {}, but is {}!", distribution_.size() - 1, rank_places_offset_ + place);
+    return distribution_[rank_places_offset_ + place + 1] - distribution_[rank_places_offset_ + place];
 }
 
 std::size_t data_distribution::place_row_offset(const std::size_t place) const noexcept {
     PLSSVM_ASSERT(distribution_.size() >= 2, "At least one place must be present and, therefore, the distribution vector must contain at least two entries!");
-    PLSSVM_ASSERT(place < distribution_.size() - 1, "The queried place can at most be {}, but is {}!", distribution_.size() - 1, place);
-    return distribution_[place];
+    PLSSVM_ASSERT(rank_places_offset_ + place < distribution_.size() - 1, "The queried place can at most be {}, but is {}!", distribution_.size() - 1, rank_places_offset_ + place);
+    return distribution_[rank_places_offset_ + place];
 }
 
 const std::vector<std::size_t> &data_distribution::distribution() const noexcept {
@@ -64,12 +96,16 @@ std::size_t data_distribution::num_rows() const noexcept {
     return num_rows_;
 }
 
+std::size_t data_distribution::total_num_places() const noexcept {
+    return total_num_places_;
+}
+
 std::size_t data_distribution::num_places() const noexcept {
     return num_places_;
 }
 
 std::ostream &operator<<(std::ostream &out, const data_distribution &dist) {
-    return out << fmt::format(fmt::runtime("{ num_rows: {}, num_places: {}, dist: [{}] }"), dist.num_rows(), dist.num_places(), fmt::join(dist.distribution(), ", "));
+    return out << fmt::format(fmt::runtime("{{ num_rows: {}, total_num_places: {}, dist: [{}] }}"), dist.num_rows(), dist.total_num_places(), fmt::join(dist.distribution(), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -77,31 +113,37 @@ std::ostream &operator<<(std::ostream &out, const data_distribution &dist) {
 //*************************************************************************************************************************************//
 using namespace literals;
 
-triangular_data_distribution::triangular_data_distribution(const std::size_t num_rows, const std::size_t num_places) :
-    data_distribution{ num_rows, num_places } {
-    // set all distribution values to "num_rows"
-    std::fill(distribution_.begin(), distribution_.end(), num_rows);
-
-    if (!distribution_.empty()) {  // necessary to silence GCC "potential null pointer dereference [-Wnull-dereference]" warning
-        distribution_.front() = 0;
-    }
+triangular_data_distribution::triangular_data_distribution(mpi::communicator comm, const std::size_t num_rows, const std::size_t num_places) :
+    data_distribution{ std::move(comm), num_rows, num_places } {
+    // the triangular distribution function
+    const auto distribute = [](const std::size_t current_num_rows, const std::size_t offset, const std::size_t current_num_places) {
+        std::vector<std::size_t> result(current_num_places + 1, current_num_rows);
+        if (!result.empty()) {  // necessary to silence GCC "potential null pointer dereference [-Wnull-dereference]" warning
+            result.front() = 0;
+        }
 
-    // only the upper triangular matrix is important
-    const std::size_t balanced = (num_rows * (num_rows + 1) / 2) / num_places;
+        // only the upper triangular matrix is important
+        const std::size_t balanced = ((current_num_rows * (current_num_rows + 1) / 2) + current_num_rows * offset) / current_num_places;
+
+        std::size_t range_idx = 1;
+        std::size_t sum = 0;
+        std::size_t row = 0;
+
+        // the first row has the most data points, while the last row has the fewest
+        for (std::size_t i = current_num_rows; i >= 1; --i) {
+            sum += i + offset;
+            ++row;
+            if (sum >= balanced) {
+                result[range_idx++] = row;
+                sum = 0;
+            }
+        }
 
-    std::size_t range_idx = 1;
-    std::size_t sum = 0;
-    std::size_t row = 0;
+        return result;
+    };
 
-    // the first row has the most data points, while the last row has the fewest
-    for (std::size_t i = num_rows; i >= 1; --i) {
-        sum += i;
-        ++row;
-        if (sum >= balanced) {
-            distribution_[range_idx++] = row;
-            sum = 0;
-        }
-    }
+    // update the final distribution given the custom distribution function
+    this->update_distribution(distribute);
 
     PLSSVM_ASSERT(std::is_sorted(distribution_.cbegin(), distribution_.cend()), "The distribution must be sorted in an ascending order!");
 }
@@ -267,25 +309,34 @@ std::vector<memory_size> triangular_data_distribution::calculate_maximum_implici
     return res;
 }
 
-rectangular_data_distribution::rectangular_data_distribution(const std::size_t num_rows, const std::size_t num_places) :
-    data_distribution{ num_rows, num_places } {
-    // uniform distribution
-    const std::size_t balanced = num_rows / num_places;
-    for (std::size_t device_id = 0; device_id < num_places; ++device_id) {
-        distribution_[device_id] = balanced * device_id;
-    }
+rectangular_data_distribution::rectangular_data_distribution(mpi::communicator comm, const std::size_t num_rows, const std::size_t num_places) :
+    data_distribution{ std::move(comm), num_rows, num_places } {
+    // the uniform distribution function
+    const auto distribute = [](const std::size_t current_num_rows, const std::size_t, const std::size_t current_num_places) {
+        std::vector<std::size_t> result(current_num_places + 1);
 
-    // fill remaining values into distribution starting at device 0
-    const std::size_t remaining = num_rows - num_places * balanced;
-    std::size_t running = 0;
-    for (std::size_t device_id = 1; device_id <= num_places; ++device_id) {
-        distribution_[device_id] += running;
-        if (device_id - 1 < remaining) {
-            distribution_[device_id] += 1;
-            ++running;
+        const std::size_t balanced = current_num_rows / current_num_places;
+        for (std::size_t device_id = 0; device_id < current_num_places; ++device_id) {
+            result[device_id] = balanced * device_id;
         }
-    }
-    distribution_.back() = num_rows;
+
+        // fill remaining values into distribution starting at device 0
+        const std::size_t remaining = current_num_rows - (current_num_places * balanced);
+        std::size_t running = 0;
+        for (std::size_t device_id = 1; device_id <= current_num_places; ++device_id) {
+            result[device_id] += running;
+            if (device_id - 1 < remaining) {
+                result[device_id] += 1;
+                ++running;
+            }
+        }
+        result.back() = current_num_rows;
+
+        return result;
+    };
+
+    // update the final distribution given the custom distribution function
+    this->update_distribution(distribute);
 
     PLSSVM_ASSERT(std::is_sorted(distribution_.cbegin(), distribution_.cend()), "The distribution must be sorted in an ascending order!");
 }
diff --git a/src/plssvm/detail/string_conversion.cpp b/src/plssvm/detail/fast_float_wrapper.cpp
similarity index 76%
rename from src/plssvm/detail/string_conversion.cpp
rename to src/plssvm/detail/fast_float_wrapper.cpp
index 0fa99877d..a771b5f50 100644
--- a/src/plssvm/detail/string_conversion.cpp
+++ b/src/plssvm/detail/fast_float_wrapper.cpp
@@ -6,17 +6,15 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "plssvm/detail/string_conversion.hpp"
-
-#include "plssvm/detail/string_utility.hpp"  // plssvm::detail::trim_left
-#include "plssvm/detail/type_traits.hpp"     // plssvm::remove_cvref_t
+#include "plssvm/detail/fast_float_wrapper.hpp"
 
 #include "fast_float/fast_float.h"  // fast_float::from_chars_result, fast_float::from_chars
 
+#include <algorithm>     // std::min
 #include <string>        // std::stold, std::string
-#include <string_view>   // std::string_view
+#include <string_view>   // std::string_view, std::string_view::size_type
 #include <system_error>  // std::errc
-#include <type_traits>   // std::is_same_v, std::is_floating_point_v
+#include <type_traits>   // std::is_same_v, std::is_floating_point_v, std::remove_reference_t, std::remove_const_t
 #include <utility>       // std::pair, std::make_pair
 
 namespace plssvm::detail {
@@ -24,11 +22,12 @@ namespace plssvm::detail {
 template <typename T>
 std::pair<T, std::errc> convert_to_floating_point(const std::string_view str) {
     static_assert(std::is_floating_point_v<T>, "'convert_to_floating_point' may only be called with template types 'T' which are floating points!");
-    if constexpr (std::is_same_v<remove_cvref_t<T>, long double>) {
+    if constexpr (std::is_same_v<std::remove_reference_t<std::remove_const_t<T>>, long double>) {
         return std::make_pair(std::stold(std::string{ str }), std::errc{});
     } else {
         // remove leading whitespaces
-        const std::string_view trimmed_str = trim_left(str);
+        const std::string_view::size_type pos = std::min(str.find_first_not_of(" \t\v\r\n\f"), str.size());
+        const std::string_view trimmed_str = str.substr(pos);
 
         // convert string to value fo type T
         T val{};
diff --git a/src/plssvm/detail/io/file_reader.cpp b/src/plssvm/detail/io/file_reader.cpp
index ed934a816..1973b1c36 100644
--- a/src/plssvm/detail/io/file_reader.cpp
+++ b/src/plssvm/detail/io/file_reader.cpp
@@ -293,8 +293,8 @@ const char *file_reader::buffer() const noexcept {
     return file_content_;
 }
 
-void file_reader::open_memory_mapped_file_unix([[maybe_unused]] const char *filename) {
 #if defined(PLSSVM_HAS_MEMORY_MAPPING_UNIX)
+void file_reader::open_memory_mapped_file_unix([[maybe_unused]] const char *filename) {
     // open the file
     file_descriptor_ = ::open(filename, O_RDONLY);
 
@@ -324,13 +324,11 @@ void file_reader::open_memory_mapped_file_unix([[maybe_unused]] const char *file
             must_unmap_file_ = true;
         }
     }
-#else
-    throw file_reader_exception{ "Called open_memory_mapped_file_unix(), but the necessary headers couldn't be found!" };
-#endif
 }
+#endif
 
-void file_reader::open_memory_mapped_file_windows([[maybe_unused]] const char *filename) {
 #if defined(PLSSVM_HAS_MEMORY_MAPPING_WINDOWS)
+void file_reader::open_memory_mapped_file_windows([[maybe_unused]] const char *filename) {
     // open the file
     file_ = CreateFile(filename, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, nullptr);
     // check if file could be opened
@@ -377,10 +375,8 @@ void file_reader::open_memory_mapped_file_windows([[maybe_unused]] const char *f
             }
         }
     }
-#else
-    throw file_reader_exception{ "Called open_memory_mapped_file_windows(), but the necessary headers couldn't be found!" };
-#endif
 }
+#endif
 
 void file_reader::open_file(const char *filename) {
     // open the file
diff --git a/src/plssvm/detail/tracking/CMakeLists.txt b/src/plssvm/detail/tracking/CMakeLists.txt
index c5a3fdcf3..a535a34c3 100644
--- a/src/plssvm/detail/tracking/CMakeLists.txt
+++ b/src/plssvm/detail/tracking/CMakeLists.txt
@@ -29,13 +29,38 @@ if (PLSSVM_ENABLE_HARDWARE_SAMPLING)
     target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_HARDWARE_SAMPLING_INTERVAL=${PLSSVM_HARDWARE_SAMPLING_INTERVAL}ms)
 
     include(FetchContent)
-    set(PLSSVM_hws_VERSION v1.0.3)
+    set(PLSSVM_hws_VERSION v1.1.1)
     find_package(hws QUIET)
     if (hws_FOUND)
         message(STATUS "Found package hws.")
     else ()
-        set(PLSSVM_)
         message(STATUS "Couldn't find package hws. Building version ${PLSSVM_hws_VERSION} from source.")
+        # always try to sample CPU information
+        set(HWS_ENABLE_CPU_SAMPLING AUTO CACHE INTERNAL "" FORCE)
+        # NVIDIA GPUs
+        if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
+            # NVIDIA GPU targets are provided -> try to sample information
+            set(HWS_ENABLE_GPU_NVIDIA_SAMPLING AUTO CACHE INTERNAL "" FORCE)
+        else ()
+            # no NVIDIA GPU targets are provided -> don't try to sample information
+            set(HWS_ENABLE_GPU_NVIDIA_SAMPLING OFF CACHE INTERNAL "" FORCE)
+        endif ()
+        # AMD GPUs
+        if (DEFINED PLSSVM_AMD_TARGET_ARCHS)
+            # AMD GPU targets are provided -> try to sample information
+            set(HWS_ENABLE_GPU_AMD_SAMPLING AUTO CACHE INTERNAL "" FORCE)
+        else ()
+            # no AMD GPU targets are provided -> don't try to sample information
+            set(HWS_ENABLE_GPU_AMD_SAMPLING OFF CACHE INTERNAL "" FORCE)
+        endif ()
+        # Intel GPUs
+        if (DEFINED PLSSVM_INTEL_TARGET_ARCHS)
+            # Intel GPU targets are provided -> try to sample information
+            set(HWS_ENABLE_GPU_INTEL_SAMPLING AUTO CACHE INTERNAL "" FORCE)
+        else ()
+            # no Intel GPU targets are provided -> don't try to sample information
+            set(HWS_ENABLE_GPU_INTEL_SAMPLING OFF CACHE INTERNAL "" FORCE)
+        endif ()
         set(HWS_ENABLE_ERROR_CHECKS ${PLSSVM_ENABLE_ASSERTS} CACHE INTERNAL "" FORCE)
         set(HWS_SAMPLING_INTERVAL ${PLSSVM_HARDWARE_SAMPLING_INTERVAL} CACHE INTERNAL "" FORCE)
         set(HWS_ENABLE_PYTHON_BINDINGS OFF CACHE INTERNAL "" FORCE)
diff --git a/src/plssvm/detail/tracking/performance_tracker.cpp b/src/plssvm/detail/tracking/performance_tracker.cpp
index 6d1323e8e..58b4e975a 100644
--- a/src/plssvm/detail/tracking/performance_tracker.cpp
+++ b/src/plssvm/detail/tracking/performance_tracker.cpp
@@ -17,6 +17,9 @@
 #include "plssvm/detail/string_utility.hpp"              // plssvm::detail::replace_all
 #include "plssvm/detail/utility.hpp"                     // plssvm::detail::current_date_time, PLSSVM_IS_DEFINED
 #include "plssvm/gamma.hpp"                              // plssvm::get_gamma_string
+#include "plssvm/mpi/communicator.hpp"                   // plssvm::mpi::communicator
+#include "plssvm/mpi/detail/utility.hpp"                 // plssvm::mpi::detail::node_name
+#include "plssvm/mpi/detail/version.hpp"                 // plssvm::mpi::detail::{mpi_library_version, mpi_version}
 #include "plssvm/parameter.hpp"                          // plssvm::parameter
 #include "plssvm/version/git_metadata/git_metadata.hpp"  // plssvm::version::git_metadata::commit_sha1
 #include "plssvm/version/version.hpp"                    // plssvm::version::{version, detail::target_platforms}
@@ -29,11 +32,12 @@
     #include "hws/version.hpp"                  // hws::version::version
 #endif
 
-#include "cxxopts.hpp"   // CXXOPTS__VERSION_MAJOR, CXXOPTS__VERSION_MINOR, CXXOPTS__VERSION_MINOR
-#include "fmt/base.h"    // FMT_VERSION
-#include "fmt/chrono.h"  // format std::chrono types
-#include "fmt/format.h"  // fmt::format
-#include "fmt/ranges.h"  // fmt::join
+#include "cxxopts.hpp"                // CXXOPTS__VERSION_MAJOR, CXXOPTS__VERSION_MINOR, CXXOPTS__VERSION_MINOR
+#include "fast_float/float_common.h"  // FASTFLOAT_VERSION_MAJOR, FASTFLOAT_VERSION_MINOR, FASTFLOAT_VERSION_PATCH
+#include "fmt/base.h"                 // FMT_VERSION
+#include "fmt/chrono.h"               // format std::chrono types
+#include "fmt/format.h"               // fmt::format
+#include "fmt/ranges.h"               // fmt::join
 
 #if __has_include(<unistd.h>)
     #include <unistd.h>  // gethostname, getlogin_r, sysconf, _SC_HOST_NAME_MAX, _SC_LOGIN_NAME_MAX
@@ -97,6 +101,24 @@ void performance_tracker::add_tracking_entry(const tracking_entry<plssvm::parame
     }
 }
 
+void performance_tracker::add_tracking_entry([[maybe_unused]] const tracking_entry<mpi::communicator> &entry) {
+    // track MPI only if available
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    // check whether entries should currently be tracked
+    if (this->is_tracking()) {
+        // create category
+        tracking_entries_.emplace(entry.entry_category, std::map<std::string, std::vector<std::string>>{});
+        // fill category with value
+        tracking_entries_["mpi"].emplace("comm_size", std::vector<std::string>{ fmt::format("{}", entry.entry_value.size()) });
+        tracking_entries_["mpi"].emplace("comm_rank", std::vector<std::string>{ fmt::format("{}", entry.entry_value.rank()) });
+        tracking_entries_["mpi"].emplace("is_main_rank", std::vector<std::string>{ fmt::format("{}", entry.entry_value.is_main_rank()) });
+        tracking_entries_["mpi"].emplace("library_version", std::vector<std::string>{ fmt::format("\"{}\"", mpi::detail::mpi_library_version()) });
+        tracking_entries_["mpi"].emplace("version", std::vector<std::string>{ fmt::format("\"{}\"", mpi::detail::mpi_version()) });
+        tracking_entries_["mpi"].emplace("node_name", std::vector<std::string>{ fmt::format("\"{}\"", mpi::detail::node_name()) });
+    }
+#endif
+}
+
 void performance_tracker::add_tracking_entry(const tracking_entry<cmd::parser_train> &entry) {
     // check whether entries should currently be tracked
     if (this->is_tracking()) {
@@ -278,35 +300,20 @@ void performance_tracker::save(std::ostream &out) {
         PADDING_SIZE);
 
 #if defined(PLSSVM_SYCL_BACKEND_HAS_DPCPP)
-    // check whether DPC++ AOT has been enabled
-    constexpr bool dpcpp_aot = PLSSVM_IS_DEFINED(PLSSVM_SYCL_BACKEND_DPCPP_ENABLE_AOT);
-
     out << fmt::format(
-        "  DPCPP_backend_type:                {}\n"
-        "  DPCPP_amd_gpu_backend_type:        {}\n"
-        "  DPCPP_with_aot:                    {}\n",
-        PLSSVM_SYCL_BACKEND_DPCPP_BACKEND_TYPE,
-        PLSSVM_SYCL_BACKEND_DPCPP_GPU_AMD_BACKEND_TYPE,
-        dpcpp_aot);
+        "  DPCPP_backend_type:                {}\n",
+        PLSSVM_SYCL_BACKEND_DPCPP_BACKEND_TYPE);
 #endif
 #if defined(PLSSVM_SYCL_BACKEND_HAS_ADAPTIVECPP)
     // check whether AdaptiveCpp's new SSCP has been enabled
     constexpr bool adaptivecpp_sscp = PLSSVM_IS_DEFINED(PLSSVM_SYCL_BACKEND_ADAPTIVECPP_USE_GENERIC_SSCP);
-    constexpr bool adaptivecpp_accelerated_cpu = PLSSVM_IS_DEFINED(__HIPSYCL_USE_ACCELERATED_CPU__);
+    constexpr bool adaptivecpp_accelerated_cpu = PLSSVM_IS_DEFINED(__ACPP_USE_ACCELERATED_CPU__);
 
     out << fmt::format(
         "  ADAPTIVECPP_with_generic_SSCP:     {}\n"
         "  ADAPTIVECPP_with_accelerated_CPU:  {}\n",
         adaptivecpp_sscp,
         adaptivecpp_accelerated_cpu);
-#endif
-#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
-    // check whether Kokkos::SYCL AOT has been enabled
-    constexpr bool kokkos_sycl_aot = PLSSVM_IS_DEFINED(PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT);
-
-    out << fmt::format(
-        "  KOKKOS_sycl_intel_llvm_with_aot:   {}\n",
-        kokkos_sycl_aot);
 #endif
     out << "\n";
 
@@ -321,11 +328,7 @@ void performance_tracker::save(std::ostream &out) {
     constexpr int fmt_version_patch = FMT_VERSION % 10;
     const std::string fmt_version{ fmt::format("{}.{}.{}", fmt_version_major, fmt_version_minor, fmt_version_patch) };
     // fast float version
-#if defined(PLSSVM_fast_float_VERSION)
-    const std::string fast_float_version{ PLSSVM_fast_float_VERSION };
-#else
-    const std::string fast_float_version{ "unknown/external" };
-#endif
+    const std::string fast_float_version{ fmt::format("{}.{}.{}", FASTFLOAT_VERSION_MAJOR, FASTFLOAT_VERSION_MINOR, FASTFLOAT_VERSION_PATCH) };
     // igor version
 #if defined(PLSSVM_igor_VERSION)
     const std::string igor_version{ PLSSVM_igor_VERSION };
diff --git a/src/plssvm/exceptions/exceptions.cpp b/src/plssvm/exceptions/exceptions.cpp
index 4fd10a27c..3b222d7f6 100644
--- a/src/plssvm/exceptions/exceptions.cpp
+++ b/src/plssvm/exceptions/exceptions.cpp
@@ -39,6 +39,10 @@ std::string exception::what_with_loc() const {
         loc_.line());
 }
 
+cmd_parser_exit::cmd_parser_exit(const int exit_code, source_location loc) :
+    exception{ fmt::format("exit code: {}", exit_code), "cmd_parser_exit", loc },
+    exit_code_{ exit_code } { }
+
 invalid_parameter_exception::invalid_parameter_exception(const std::string &msg, source_location loc) :
     exception{ msg, "invalid_parameter_exception", loc } { }
 
@@ -84,4 +88,7 @@ platform_devices_empty::platform_devices_empty(const std::string &msg, source_lo
 environment_exception::environment_exception(const std::string &msg, source_location loc) :
     exception{ msg, "environment_exception", loc } { }
 
+mpi_exception::mpi_exception(const std::string &msg, source_location loc) :
+    exception{ msg, "mpi_exception", loc } { }
+
 }  // namespace plssvm
diff --git a/src/plssvm/exceptions/source_location.cpp b/src/plssvm/exceptions/source_location.cpp
new file mode 100644
index 000000000..5b75dba27
--- /dev/null
+++ b/src/plssvm/exceptions/source_location.cpp
@@ -0,0 +1,46 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/exceptions/source_location.hpp"
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_Comm_rank, MPI_COMM_WORLD
+#endif
+#include "plssvm/mpi/environment.hpp"  // plssvm::mpi::is_active
+
+#include <cstdint>   // std::uint_least32_t
+#include <optional>  // std::make_optional
+
+namespace plssvm {
+
+source_location source_location::current(const char *file_name, const char *function_name, int line, int column) noexcept {
+    source_location loc;
+
+    loc.file_name_ = file_name;
+    loc.function_name_ = function_name;
+    loc.line_ = static_cast<std::uint_least32_t>(line);
+    loc.column_ = static_cast<std::uint_least32_t>(column);
+
+    // try getting the MPI rank wrt to MPI_COMM_WORLD
+    try {
+        if (mpi::is_active()) {
+            // prevent excessive mpi::communicator constructor calls
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+            int rank;
+            MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+            loc.world_rank_ = std::make_optional(rank);
+#endif
+        }
+    } catch (...) {
+        // std::nullopt
+    }
+
+    return loc;
+}
+
+}  // namespace plssvm
diff --git a/src/plssvm/mpi/communicator.cpp b/src/plssvm/mpi/communicator.cpp
new file mode 100644
index 000000000..d1f92addb
--- /dev/null
+++ b/src/plssvm/mpi/communicator.cpp
@@ -0,0 +1,188 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/mpi/communicator.hpp"
+
+#include "plssvm/detail/assert.hpp"            // PLSSVM_ASSERT
+#include "plssvm/exceptions/exceptions.hpp"    // plssvm::mpi_exception
+#include "plssvm/mpi/detail/mpi_datatype.hpp"  // plssvm::mpi::detail::mpi_datatype
+#include "plssvm/mpi/detail/utility.hpp"       // PLSSVM_MPI_ERROR_CHECK
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_Comm, MPI_Comm_size, MPI_Comm_rank, MPI_Barrier, MPI_Gatherv, MPI_Gather, MPI_Bcast, MPI_Comm_compare, MPI_IDENT
+#endif
+
+#include "fmt/format.h"  // fmt::format
+
+#include <algorithm>  // std::transform
+#include <chrono>     // std::chrono::milliseconds
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::int64_t
+#include <optional>   // std::optional, std::nullopt
+#include <string>     // std::string
+#include <utility>    // std::move
+#include <vector>     // std::vector
+
+namespace plssvm::mpi {
+
+communicator::communicator(std::vector<std::size_t> weights) {
+    // set load balancing weights
+    this->set_load_balancing_weights(std::move(weights));
+}
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+communicator::communicator(MPI_Comm comm) :
+    comm_{ comm },
+    load_balancing_weights_{ std::nullopt } { }
+
+communicator::communicator(MPI_Comm comm, std::vector<std::size_t> weights) :
+    comm_{ comm } {
+    // set load balancing weights
+    this->set_load_balancing_weights(std::move(weights));
+}
+#endif
+
+std::size_t communicator::size() const {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    int size{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Comm_size(comm_, &size));
+    return static_cast<std::size_t>(size);
+#else
+    return std::size_t{ 1 };
+#endif
+}
+
+std::size_t communicator::rank() const {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    int rank{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Comm_rank(comm_, &rank));
+    return static_cast<std::size_t>(rank);
+#else
+    return std::size_t{ 0 };
+#endif
+}
+
+bool communicator::is_main_rank() const {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    return this->rank() == communicator::main_rank();
+#else
+    return true;
+#endif
+}
+
+void communicator::barrier() const {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    PLSSVM_MPI_ERROR_CHECK(MPI_Barrier(comm_));
+#endif
+}
+
+std::vector<std::string> communicator::gather(const std::string &str) const {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    // gather string size information
+    const std::vector<int> sizes = this->gather(static_cast<int>(str.size()));
+
+    // calculate displacements and create receive-buffer (on main rank only!)
+    std::vector<char> recv_buffer{};
+    std::vector<int> displacements(sizes.size());
+    if (this->is_main_rank()) {
+        int total_size{};
+        for (std::size_t i = 0; i < sizes.size(); ++i) {
+            displacements[i] = total_size;
+            total_size += sizes[i];
+        }
+        recv_buffer.resize(total_size);
+    }
+
+    // gather the strings on the MPI main rank
+    PLSSVM_MPI_ERROR_CHECK(MPI_Gatherv(str.data(), str.size(), detail::mpi_datatype<char>(), recv_buffer.data(), sizes.data(), displacements.data(), detail::mpi_datatype<char>(), communicator::main_rank(), comm_));
+
+    // unpack the receive-buffer to the separate strings
+    std::vector<std::string> result(sizes.size());
+    if (this->is_main_rank()) {
+        for (std::size_t i = 0; i < sizes.size(); ++i) {
+            result[i] = std::string(recv_buffer.begin() + displacements[i],
+                                    recv_buffer.begin() + displacements[i] + sizes[i]);
+        }
+    }
+    return result;
+#else
+    return { str };
+#endif
+}
+
+std::vector<std::chrono::milliseconds> communicator::gather(const std::chrono::milliseconds &duration) const {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    // convert the duration to an integer
+    const std::int64_t intermediate_dur = duration.count();
+    std::vector<std::int64_t> intermediate_result(this->size());
+    // gather the integer values from each MPI rank
+    PLSSVM_MPI_ERROR_CHECK(MPI_Gather(&intermediate_dur, 1, detail::mpi_datatype<std::int64_t>(), intermediate_result.data(), 1, detail::mpi_datatype<std::int64_t>(), communicator::main_rank(), comm_));
+    // cast integers back to durations
+    std::vector<std::chrono::milliseconds> result(this->size());
+    std::transform(intermediate_result.cbegin(), intermediate_result.cend(), result.begin(), [](const std::int64_t dur) { return static_cast<std::chrono::milliseconds>(dur); });
+    return result;
+#else
+    return { duration };
+#endif
+}
+
+void communicator::set_load_balancing_weights(std::vector<std::size_t> weights) {
+    if (weights.size() != this->size()) {
+        throw mpi_exception{ fmt::format("The number of load balancing weights ({}) must match the number of MPI ranks ({})!", weights.size(), this->size()) };
+    }
+    load_balancing_weights_ = std::move(weights);
+}
+
+const std::optional<std::vector<std::size_t>> &communicator::get_load_balancing_weights() const noexcept {
+#if defined(PLSSVM_ENABLE_ASSERTS) && defined(PLSSVM_HAS_MPI_ENABLED)
+    // check if all MPI ranks have balancing weights
+    bool has_weights = load_balancing_weights_.has_value();
+    bool and_result{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Allreduce(&has_weights, &and_result, 1, MPI_C_BOOL, MPI_LAND, comm_));
+    bool or_result{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Allreduce(&has_weights, &or_result, 1, MPI_C_BOOL, MPI_LOR, comm_));
+
+    // All ranks are true: Both MPI_LAND and MPI_LOR will return 1.
+    // All ranks are false: Both MPI_LAND and MPI_LOR will return 0.
+    // Mixed values: MPI_LAND will return 0, and MPI_LOR will return 1.
+    // -> if the values are not equal some ranks have load balancing weights and some don't
+    PLSSVM_ASSERT(and_result == or_result, "Some MPI ranks have load balancing weights and some don't!");
+
+    // if all MPI ranks have load balancing weights, check that they are the same
+    if (and_result) {
+        // check that the balancing weights are the same for all MPI ranks
+        std::vector<std::size_t> reference_weights(load_balancing_weights_->size());
+        if (this->is_main_rank()) {
+            reference_weights = load_balancing_weights_.value();
+        }
+        PLSSVM_MPI_ERROR_CHECK(MPI_Bcast(reference_weights.data(), reference_weights.size(), detail::mpi_datatype<std::size_t>(), communicator::main_rank(), comm_));
+        // each rank checks whether its array is correct
+        // if this is not the case for at least one array, abort
+        PLSSVM_ASSERT(static_cast<bool>(reference_weights == load_balancing_weights_.value()), "The load balancing weights must be the same on all MPI ranks which is currently not the case!");
+    }
+#endif
+    return load_balancing_weights_;
+}
+
+bool operator==([[maybe_unused]] const communicator &lhs, [[maybe_unused]] const communicator &rhs) noexcept {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    // check whether the two MPI communicators are equal, i.e., their comparison result is MPI_IDENT
+    int result{};
+    MPI_Comm_compare(lhs.comm_, rhs.comm_, &result);
+    return result == MPI_IDENT;
+#else
+    // if no MPI is enabled, two communicators are always equal
+    return true;
+#endif
+}
+
+bool operator!=(const communicator &lhs, const communicator &rhs) noexcept {
+    return !(lhs == rhs);
+}
+
+}  // namespace plssvm::mpi
diff --git a/src/plssvm/mpi/detail/information.cpp b/src/plssvm/mpi/detail/information.cpp
new file mode 100644
index 000000000..3e30cdb70
--- /dev/null
+++ b/src/plssvm/mpi/detail/information.cpp
@@ -0,0 +1,117 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/mpi/detail/information.hpp"
+
+#include "plssvm/backend_types.hpp"                     // plssvm::backend_type
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"  // plssvm::detail::log_untracked
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
+#include "plssvm/solver_types.hpp"                      // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"                  // plssvm::target_platform
+#include "plssvm/verbosity_levels.hpp"                  // plssvm::verbosity_level
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+
+#include <cstddef>   // std::size_t
+#include <map>       // std::map
+#include <optional>  // std::optional, std::nullopt
+#include <string>    // std::string
+#include <vector>    // std::vector
+
+namespace plssvm::mpi::detail {
+
+void gather_and_print_solver_information(const communicator &comm, solver_type rank_solver) {
+    // gather the solver information from all MPI ranks on the main MPI rank
+    const std::vector<solver_type> all_solvers = comm.gather(rank_solver);
+
+    // output information only on the main MPI rank!
+    if (comm.is_main_rank()) {
+        // map all MPI ranks to its used solver
+        std::map<solver_type, std::vector<std::size_t>> solvers_for_rank{};
+        for (std::size_t i = 0; i < all_solvers.size(); ++i) {
+            solvers_for_rank[all_solvers[i]].push_back(i);
+        }
+
+        // output the information (again, only on the main MPI rank)
+        ::plssvm::detail::log_untracked(verbosity_level::full,
+                                        comm,
+                                        "\nThe used solver(s) for AX=B across {} MPI rank(s) are:\n",
+                                        comm.size());
+        for (const auto &[solver, ranks] : solvers_for_rank) {
+            ::plssvm::detail::log_untracked(verbosity_level::full,
+                                            comm,
+                                            "  - {}: {}\n",
+                                            solver,
+                                            fmt::join(ranks, ", "));
+        }
+        ::plssvm::detail::log_untracked(verbosity_level::full,
+                                        comm,
+                                        "\n");
+    }
+}
+
+void gather_and_print_csvm_information(const communicator &comm, backend_type rank_backend, target_platform rank_target, const std::vector<std::string> &rank_devices, const std::optional<std::string> &additional_info) {
+    // gather the information from all MPI ranks on the main MPI rank
+    const std::vector<backend_type> backends_per_ranks = comm.gather(rank_backend);
+    const std::vector<target_platform> targets_per_rank = comm.gather(rank_target);
+    // pre-process device names
+    std::map<std::string, std::size_t> devices_for_rank{};
+    for (const std::string &device : rank_devices) {
+        ++devices_for_rank[device];
+    }
+    // assemble one device name string
+    std::vector<std::string> rank_str{};
+    for (const auto &[device, count] : devices_for_rank) {
+        rank_str.emplace_back(fmt::format("{}x {}", count, device));
+    }
+    const std::vector<std::string> strings_per_rank = comm.gather(fmt::format("{}", fmt::join(rank_str, ", ")));
+    // get the potentially additional information
+    const std::vector<std::string> additional_info_per_rank = comm.gather(additional_info.value_or(""));
+
+    // output the information (again, only on the main MPI rank)
+    ::plssvm::detail::log_untracked(verbosity_level::full,
+                                    comm,
+                                    "\nThe setup across {} MPI rank(s) is:\n",
+                                    comm.size());
+    for (std::size_t i = 0; i < comm.size(); ++i) {
+        ::plssvm::detail::log_untracked(verbosity_level::full,
+                                        comm,
+                                        "  - {}: {}{} for {} ({})\n",
+                                        i,
+                                        backends_per_ranks[i],
+                                        additional_info_per_rank[i].empty() ? "" : fmt::format(" ({})", additional_info_per_rank[i]),
+                                        targets_per_rank[i],
+                                        strings_per_rank[i]);
+    }
+}
+
+void gather_and_print_csvm_information(const communicator &comm, backend_type rank_backend, target_platform rank_target, const std::optional<std::string> &additional_info) {
+    // gather the information from all MPI ranks on the main MPI rank
+    const std::vector<backend_type> backends_per_ranks = comm.gather(rank_backend);
+    const std::vector<target_platform> targets_per_rank = comm.gather(rank_target);
+    // get the potentially additional information
+    const std::vector<std::string> additional_info_per_rank = comm.gather(additional_info.value_or(""));
+
+    // output the information (again, only on the main MPI rank)
+    ::plssvm::detail::log_untracked(verbosity_level::full,
+                                    comm,
+                                    "\nThe setup across {} MPI rank(s) is:\n",
+                                    comm.size());
+    for (std::size_t i = 0; i < comm.size(); ++i) {
+        ::plssvm::detail::log_untracked(verbosity_level::full,
+                                        comm,
+                                        "  - {}: {}{} for {}\n",
+                                        i,
+                                        backends_per_ranks[i],
+                                        additional_info_per_rank[i].empty() ? "" : fmt::format(" ({})", additional_info_per_rank[i]),
+                                        targets_per_rank[i]);
+    }
+}
+
+}  // namespace plssvm::mpi::detail
diff --git a/src/plssvm/mpi/detail/utility.cpp b/src/plssvm/mpi/detail/utility.cpp
new file mode 100644
index 000000000..d61991d76
--- /dev/null
+++ b/src/plssvm/mpi/detail/utility.cpp
@@ -0,0 +1,49 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/mpi/detail/utility.hpp"
+
+#include "plssvm/exceptions/exceptions.hpp"  // plssvm::mpi_exception
+
+#include "fmt/format.h"  // fmt::format
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_Get_processor_name
+#endif
+
+#include <string>  // std::string
+
+namespace plssvm::mpi::detail {
+
+void mpi_error_check([[maybe_unused]] const int err) {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    if ((err) != MPI_SUCCESS) {
+        std::string err_str(MPI_MAX_ERROR_STRING, '\0');
+        int err_str_len{};
+        const int res = MPI_Error_string(err, err_str.data(), &err_str_len);
+        if (res == MPI_SUCCESS) {
+            throw plssvm::mpi_exception{ fmt::format("MPI error {}: {}", err, err_str.substr(0, err_str.find_first_of('\0'))) };
+        } else {
+            throw plssvm::mpi_exception{ fmt::format("MPI error {}", err) };
+        }
+    }
+#endif
+}
+
+std::string node_name() {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    std::string name(MPI_MAX_PROCESSOR_NAME, '\0');
+    int resultlen{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Get_processor_name(name.data(), &resultlen));
+    return name.substr(0, name.find_first_of('\0'));
+#else
+    return std::string{ "unknown/unused" };
+#endif
+}
+
+}  // namespace plssvm::mpi::detail
diff --git a/src/plssvm/mpi/detail/version.cpp b/src/plssvm/mpi/detail/version.cpp
new file mode 100644
index 000000000..3e9cb0124
--- /dev/null
+++ b/src/plssvm/mpi/detail/version.cpp
@@ -0,0 +1,45 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/mpi/detail/version.hpp"
+
+#include "plssvm/mpi/detail/utility.hpp"  // PLSSVM_MPI_ERROR_CHECK
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_Get_library_version, MPI_Get_version
+#endif
+
+#include "fmt/format.h"  // fmt::format
+
+#include <string>  // std::string
+
+namespace plssvm::mpi::detail {
+
+std::string mpi_library_version() {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    std::string version(MPI_MAX_LIBRARY_VERSION_STRING, '\0');
+    int resultlen{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Get_library_version(version.data(), &resultlen));
+    return version.substr(0, version.find_first_of('\0'));
+#else
+    return std::string{ "unknown/unused" };
+#endif
+}
+
+std::string mpi_version() {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    int version{};
+    int subversion{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Get_version(&version, &subversion));
+    return fmt::format("{}.{}", version, subversion);
+#else
+    return std::string{ "unknown/unused" };
+#endif
+}
+
+}  // namespace plssvm::mpi::detail
diff --git a/src/plssvm/mpi/environment.cpp b/src/plssvm/mpi/environment.cpp
new file mode 100644
index 000000000..205d22f2a
--- /dev/null
+++ b/src/plssvm/mpi/environment.cpp
@@ -0,0 +1,93 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/mpi/environment.hpp"
+
+#include "plssvm/exceptions/exceptions.hpp"  // plssvm::mpi_exception
+#include "plssvm/mpi/detail/utility.hpp"     // PLSSVM_MPI_ERROR_CHECK
+
+#include "fmt/format.h"  // fmt::format
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_THREAD_FUNNELED, MPI_Init_thread, MPI_Finalize, MPI_Initialized, MPI_Finalized
+#endif
+
+#include <cstdlib>  // EXIT_FAILURE, std::getenv, std::abort
+
+namespace plssvm::mpi {
+
+void init() {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    constexpr int required = MPI_THREAD_FUNNELED;
+    int provided{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Init_thread(nullptr, nullptr, required, &provided));
+    if (required < provided) {
+        throw mpi_exception{ fmt::format("Error: provided thread level {} to small for requested thread level {}!", provided, required) };
+    }
+#endif
+}
+
+void init([[maybe_unused]] int &argc, [[maybe_unused]] char **argv) {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    constexpr int required = MPI_THREAD_FUNNELED;
+    int provided{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Init_thread(&argc, &argv, required, &provided));
+    if (required < provided) {
+        throw mpi_exception{ fmt::format("Error: provided thread level {} to small for requested thread level {}!", provided, required) };
+    }
+#endif
+}
+
+void finalize() {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    PLSSVM_MPI_ERROR_CHECK(MPI_Finalize());
+#endif
+}
+
+void abort_world() {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    PLSSVM_MPI_ERROR_CHECK(MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE));
+#else
+    std::abort();
+#endif
+}
+
+bool is_initialized() {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    int flag{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Initialized(&flag));
+    return static_cast<bool>(flag);
+#else
+    return true;
+#endif
+}
+
+bool is_finalized() {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    int flag{};
+    PLSSVM_MPI_ERROR_CHECK(MPI_Finalized(&flag));
+    return static_cast<bool>(flag);
+#else
+    return true;
+#endif
+}
+
+bool is_active() {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    return is_initialized() && !is_finalized();
+#else
+    return false;
+#endif
+}
+
+bool is_executed_via_mpirun() {
+    return std::getenv("OMPI_COMM_WORLD_SIZE") != nullptr ||  // OpenMPI
+           std::getenv("PMI_SIZE") != nullptr;                // MPICH, IntelMPI, OpenMPI
+}
+
+}  // namespace plssvm::mpi
diff --git a/src/plssvm/svm/csvm.cpp b/src/plssvm/svm/csvm.cpp
index 72b127183..3f750e088 100644
--- a/src/plssvm/svm/csvm.cpp
+++ b/src/plssvm/svm/csvm.cpp
@@ -10,12 +10,11 @@
 
 #include "plssvm/constants.hpp"                            // plssvm::real_type, plssvm::PADDING_SIZE
 #include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
-#include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log.hpp"               // plssvm::detail::log
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"     // plssvm::detail::log_untracked
 #include "plssvm/detail/move_only_any.hpp"                 // plssvm::detail::move_only_any
 #include "plssvm/detail/operators.hpp"                     // plssvm operator overloads for vectors
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT, plssvm::detail::tracking::tracking_entry
-#include "plssvm/detail/utility.hpp"                       // plssvm::detail::to_underlying
-#include "plssvm/exceptions/exceptions.hpp"                // plssvm::invalid_parameter_exception
 #include "plssvm/gamma.hpp"                                // plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"                // plssvm::kernel_function_type
 #include "plssvm/kernel_functions.hpp"                     // plssvm::kernel_function
@@ -34,27 +33,11 @@
 #include <numeric>     // std::inner_product
 #include <utility>     // std::move
 #include <utility>     // std::pair, std::make_pair
-#include <variant>     // std::holds_alternative, std::get
+#include <variant>     // std::get
 #include <vector>      // std::vector
 
 namespace plssvm {
 
-void csvm::sanity_check_parameter() const {
-    // kernel: valid kernel function
-    const auto kernel_type_value = detail::to_underlying(params_.kernel_type);
-    if (kernel_type_value < 0 || kernel_type_value >= 6) {
-        throw invalid_parameter_exception{ fmt::format("Invalid kernel function with value {} given!", kernel_type_value) };
-    }
-
-    // gamma: must be greater than 0 IF explicitly provided as real_type (not for the linear kernel)
-    if (params_.kernel_type != kernel_function_type::linear && std::holds_alternative<real_type>(params_.gamma) && std::get<real_type>(params_.gamma) <= real_type{ 0.0 }) {
-        throw invalid_parameter_exception{ fmt::format("gamma must be greater than 0.0, but is {}!", std::get<real_type>(params_.gamma)) };
-    }
-    // degree: all allowed
-    // coef0: all allowed
-    // cost: all allowed
-}
-
 std::pair<soa_matrix<real_type>, std::vector<unsigned long long>> csvm::conjugate_gradients(const std::vector<detail::move_only_any> &A, const soa_matrix<real_type> &B, const real_type eps, const unsigned long long max_cg_iter, const solver_type cg_solver) const {
     using namespace plssvm::operators;
 
@@ -84,6 +67,8 @@ std::pair<soa_matrix<real_type>, std::vector<unsigned long long>> csvm::conjugat
     // R = B - A * X
     soa_matrix<real_type> R{ B, shape{ PADDING_SIZE, PADDING_SIZE } };
     blas_level_3_times.push_back(this->run_blas_level_3(cg_solver, real_type{ -1.0 }, A, X, real_type{ 1.0 }, R));
+    // reduce R matrix on all MPI ranks
+    comm_.allreduce_inplace(R);
 
     // delta = R.T * R
     std::vector<real_type> delta = rowwise_dot(R, R);
@@ -143,15 +128,16 @@ std::pair<soa_matrix<real_type>, std::vector<unsigned long long>> csvm::conjugat
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT(fmt::format("cg iter {} start", iter));
 
         const std::size_t max_residual_difference_idx = rhs_idx_max_residual_difference();
-        detail::log(verbosity_level::full | verbosity_level::timing,
-                    "Start Iteration {} (max: {}) with {}/{} converged rhs (max residual {} with target residual {} for rhs {}). ",
-                    iter + 1,
-                    max_cg_iter,
-                    num_rhs_converged(),
-                    num_rhs,
-                    delta[max_residual_difference_idx],
-                    eps * eps * delta0[max_residual_difference_idx],
-                    max_residual_difference_idx);
+        detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                              comm_,
+                              "Start Iteration {} (max: {}) with {}/{} converged rhs (max residual {} with target residual {} for rhs {}). ",
+                              iter + 1,
+                              max_cg_iter,
+                              num_rhs_converged(),
+                              num_rhs,
+                              delta[max_residual_difference_idx],
+                              eps * eps * delta0[max_residual_difference_idx],
+                              max_residual_difference_idx);
         const std::chrono::steady_clock::time_point iteration_start_time = std::chrono::steady_clock::now();
 
         // create mask for the residual -> only update X if the respective rhs did not already converge
@@ -160,6 +146,8 @@ std::pair<soa_matrix<real_type>, std::vector<unsigned long long>> csvm::conjugat
         // Q = A * D
         soa_matrix<real_type> Q{ shape{ D.num_rows(), D.num_cols() }, shape{ PADDING_SIZE, PADDING_SIZE } };
         blas_level_3_times.push_back(this->run_blas_level_3(cg_solver, real_type{ 1.0 }, A, D, real_type{ 0.0 }, Q));
+        // reduce Q matrix on all MPI ranks
+        comm_.allreduce_inplace(Q);
 
         // alpha = delta_new / (D^T * Q))
         const std::vector<real_type> alpha = delta / rowwise_dot(D, Q);
@@ -172,6 +160,8 @@ std::pair<soa_matrix<real_type>, std::vector<unsigned long long>> csvm::conjugat
             // R = B - A * X
             R = soa_matrix<real_type>{ B, shape{ PADDING_SIZE, PADDING_SIZE } };
             blas_level_3_times.push_back(this->run_blas_level_3(cg_solver, real_type{ -1.0 }, A, X, real_type{ 1.0 }, R));
+            // reduce R matrix on all MPI ranks
+            comm_.allreduce_inplace(R);
         } else {
             // R = R - alpha * Q
             R -= rowwise_scale(alpha, Q);
@@ -188,9 +178,10 @@ std::pair<soa_matrix<real_type>, std::vector<unsigned long long>> csvm::conjugat
 
         const std::chrono::steady_clock::time_point iteration_end_time = std::chrono::steady_clock::now();
         const std::chrono::duration iteration_duration = std::chrono::duration_cast<std::chrono::milliseconds>(iteration_end_time - iteration_start_time);
-        detail::log(verbosity_level::full | verbosity_level::timing,
-                    "Done in {}.\n",
-                    iteration_duration);
+        detail::log_untracked(verbosity_level::full | verbosity_level::timing,
+                              comm_,
+                              "Done in {}.\n",
+                              iteration_duration);
         total_iteration_time += iteration_duration;
 
         // next CG iteration
@@ -199,6 +190,7 @@ std::pair<soa_matrix<real_type>, std::vector<unsigned long long>> csvm::conjugat
     }
     const std::size_t max_residual_difference_idx = rhs_idx_max_residual_difference();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                comm_,
                 "Finished after {}/{} iterations with {}/{} converged rhs (max residual {} with target residual {} for rhs {}) and an average iteration time of {}.\n",
                 detail::tracking::tracking_entry{ "cg", "iterations", iter },
                 detail::tracking::tracking_entry{ "cg", "max_iterations", max_cg_iter },
@@ -212,9 +204,10 @@ std::pair<soa_matrix<real_type>, std::vector<unsigned long long>> csvm::conjugat
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "cg", "residuals", delta }));
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "cg", "target_residuals", eps * eps * delta0 }));
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "cg", "epsilon", eps }));
-    detail::log(verbosity_level::libsvm,
-                "optimization finished, #iter = {}\n",
-                iter);
+    detail::log_untracked(verbosity_level::libsvm,
+                          comm_,
+                          "optimization finished, #iter = {}\n",
+                          iter);
 
     PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_EVENT("cg end");
 
@@ -271,6 +264,7 @@ std::pair<std::vector<real_type>, real_type> csvm::perform_dimensional_reduction
     const real_type QA_cost = kernel_function(A, num_rows_reduced, A, num_rows_reduced, params) + real_type{ 1.0 } / params.cost;
     const std::chrono::steady_clock::time_point dimension_reduction_end_time = std::chrono::steady_clock::now();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                comm_,
                 "Performed dimensional reduction in {}.\n",
                 detail::tracking::tracking_entry{ "cg", "dimensional_reduction", std::chrono::duration_cast<std::chrono::milliseconds>(dimension_reduction_end_time - dimension_reduction_start_time) });
 
@@ -292,10 +286,12 @@ std::chrono::duration<long, std::milli> csvm::run_blas_level_3(const solver_type
 aos_matrix<real_type> csvm::run_predict_values(const parameter &params, const soa_matrix<real_type> &support_vectors, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, soa_matrix<real_type> &w, const soa_matrix<real_type> &predict_points) const {
     const std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
 
-    decltype(auto) res = this->predict_values(params, support_vectors, alpha, rho, w, predict_points);
+    aos_matrix<real_type> res = this->predict_values(params, support_vectors, alpha, rho, w, predict_points);
+    comm_.allreduce_inplace(res);
 
     const std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now();
     detail::log(verbosity_level::full | verbosity_level::timing,
+                comm_,
                 "Predicted the values of {} predict points using {} support vectors with {} features each in {}.\n",
                 predict_points.num_rows(),
                 support_vectors.num_rows(),
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index b067bea48..e7be2758e 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -7,8 +7,8 @@
 list(APPEND CMAKE_MESSAGE_INDENT "Tests:  ")
 
 # setup testing wit GoogleTest
-set(PLSSVM_googletest_VERSION v1.15.2)
-find_package(GTest 1.15.2 QUIET)
+set(PLSSVM_googletest_VERSION v1.16.0)
+find_package(GTest 1.16.0 QUIET)
 if (GTEST_FOUND)
     message(STATUS "Found package GTest.")
 else ()
@@ -129,6 +129,14 @@ option(PLSSVM_TEST_WITH_REDUCED_LABEL_TYPES "Reduce the number of tested label t
 if (PLSSVM_TEST_WITH_REDUCED_LABEL_TYPES)
     message(STATUS "Reducing the number of tested label types.")
     target_compile_definitions(${PLSSVM_BASE_TEST_LIBRARY_NAME} PUBLIC PLSSVM_TEST_WITH_REDUCED_LABEL_TYPES)
+else ()
+    # use all possible label types currently not supported if nvc++ is the CMAKE_CXX_COMPILER
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
+        message(
+            FATAL_ERROR
+                "Full label type test currently not supported with nvc++ as CMAKE_CXX_COMPILER. Please set \"PLSSVM_TEST_WITH_REDUCED_LABEL_TYPES=OFF\" or use another compiler!"
+        )
+    endif ()
 endif ()
 
 # set necessary include directories
@@ -152,6 +160,8 @@ set(PLSSVM_BASE_TEST_NAME Base_tests)
 set(PLSSVM_BASE_TEST_SOURCES
     # it is unnecessary to test the execution range for each backend
     ${CMAKE_CURRENT_LIST_DIR}/backends/execution_range.cpp
+    # since the Kokkos execution_space enumeration is used even if no Kokkos backend is available this is also tested in the base library
+    ${CMAKE_CURRENT_LIST_DIR}/backends/Kokkos/execution_space.cpp
     # since the SYCL implementation_type and kernel_invocation_type enumerations are used even if no SYCL backend is available these are also tested in the base
     # library
     ${CMAKE_CURRENT_LIST_DIR}/backends/SYCL/implementation_types.cpp
@@ -190,8 +200,10 @@ set(PLSSVM_BASE_TEST_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/detail/arithmetic_type_name.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/assert.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/data_distribution.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/detail/logging.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/detail/logging_without_performance_tracking.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/logging/log.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/logging/log_untracked.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/logging/mpi_log.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/logging/mpi_log_untracked.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/memory_size.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/move_only_any.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/operators.cpp
@@ -203,24 +215,32 @@ set(PLSSVM_BASE_TEST_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
     ${CMAKE_CURRENT_LIST_DIR}/exceptions/exceptions.cpp
     ${CMAKE_CURRENT_LIST_DIR}/exceptions/source_location.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/model/classification_model.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/model/regression_model.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/mpi/detail/information.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/mpi/detail/mpi_datatype.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/mpi/detail/utility.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/mpi/detail/version.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/mpi/communicator.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/mpi/environment.cpp
     ${CMAKE_CURRENT_LIST_DIR}/svm/csvm.cpp
     ${CMAKE_CURRENT_LIST_DIR}/svm/csvc.cpp
     ${CMAKE_CURRENT_LIST_DIR}/svm/csvr.cpp
     ${CMAKE_CURRENT_LIST_DIR}/version/git_metadata/git_metadata.cpp
     ${CMAKE_CURRENT_LIST_DIR}/version/version.cpp
     ${CMAKE_CURRENT_LIST_DIR}/backend_types.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/classification_model.cpp
     ${CMAKE_CURRENT_LIST_DIR}/classification_report.cpp
     ${CMAKE_CURRENT_LIST_DIR}/classification_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/csvc_factory.cpp
     ${CMAKE_CURRENT_LIST_DIR}/csvr_factory.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/environment.cpp
     ${CMAKE_CURRENT_LIST_DIR}/file_format_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/gamma.cpp
     ${CMAKE_CURRENT_LIST_DIR}/kernel_function_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/kernel_functions.cpp
     ${CMAKE_CURRENT_LIST_DIR}/matrix.cpp
     ${CMAKE_CURRENT_LIST_DIR}/parameter.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/regression_model.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/regression_report.cpp
     ${CMAKE_CURRENT_LIST_DIR}/shape.cpp
     ${CMAKE_CURRENT_LIST_DIR}/solver_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/svm_types.cpp
@@ -264,16 +284,17 @@ add_test(NAME MainScale/executable_version COMMAND ${PLSSVM_EXECUTABLE_SCALE_NAM
 
 # add minimal run test for scaling
 add_test(NAME MainScale/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_SCALE_NAME} "${PLSSVM_CLASSIFICATION_TEST_FILE}"
-                                                   "${CMAKE_CURRENT_BINARY_DIR}/scaled.libsvm.model"
+                                                   "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.scaled"
 )
+add_test(NAME MainScale/executable_minimal_cmd_output COMMAND ${PLSSVM_EXECUTABLE_SCALE_NAME} "${PLSSVM_CLASSIFICATION_TEST_FILE}")
 add_test(NAME MainScale/executable_save_scaling_factors
          COMMAND ${PLSSVM_EXECUTABLE_SCALE_NAME} -s "${CMAKE_CURRENT_BINARY_DIR}/scaling_parameter.txt" # the file the scaling parameters are saved to
                  "${PLSSVM_CLASSIFICATION_TEST_FILE}" # the file to scale
-                 "${CMAKE_CURRENT_BINARY_DIR}/scaled.libsvm.model" # the scaled file (result)
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.scaled" # the scaled file (result)
 )
 
 # add minimal run test for the classification task
-add_test(NAME MainTrainClassification/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+add_test(NAME MainTrainClassification/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -i 10 "${PLSSVM_CLASSIFICATION_TEST_FILE}"
                                                                  "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
 )
 add_test(NAME MainPredictClassification/executable_minimal
@@ -281,8 +302,9 @@ add_test(NAME MainPredictClassification/executable_minimal
                  "${CMAKE_CURRENT_LIST_DIR}/data/model/classification/5x4.libsvm.model" # model file
                  "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
 )
+
 # add minimal run test for the regression task
-add_test(NAME MainTrainRegression/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 "${PLSSVM_REGRESSION_TEST_FILE}"
+add_test(NAME MainTrainRegression/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -i 10 "${PLSSVM_REGRESSION_TEST_FILE}"
                                                              "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
 )
 add_test(NAME MainPredictRegression/executable_minimal
@@ -291,6 +313,22 @@ add_test(NAME MainPredictRegression/executable_minimal
                  "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
 )
 
+# if performance tracking is enabled, also add minimal run tests
+if (PLSSVM_ENABLE_PERFORMANCE_TRACKING)
+    add_test(NAME MainTrainPerformanceTracking/executable_minimal
+             COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 --performance_tracking "${CMAKE_CURRENT_BINARY_DIR}/track.yaml" "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                     "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+    )
+    add_test(
+        NAME MainPredictPerformanceTracking/executable_minimal
+        COMMAND
+            ${PLSSVM_EXECUTABLE_PREDICT_NAME} --performance_tracking "${CMAKE_CURRENT_BINARY_DIR}/track.yaml"
+            "${CMAKE_CURRENT_LIST_DIR}/data/libsvm/classification/5x4.libsvm" # test file
+            "${CMAKE_CURRENT_LIST_DIR}/data/model/classification/5x4.libsvm.model" # model file
+            "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+    )
+endif ()
+
 # add failing test (must return with a non-zero exit code)
 add_test(NAME MainTrain/executable_fail COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} " ")
 set_tests_properties(MainTrain/executable_fail PROPERTIES WILL_FAIL TRUE)
diff --git a/tests/backends/CUDA/CMakeLists.txt b/tests/backends/CUDA/CMakeLists.txt
index 78b120390..ee4dcea66 100644
--- a/tests/backends/CUDA/CMakeLists.txt
+++ b/tests/backends/CUDA/CMakeLists.txt
@@ -30,6 +30,26 @@ include(GoogleTest)
 include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
 discover_tests_with_death_test_filter(${PLSSVM_CUDA_TEST_NAME})
 
+# add minimal run test for the classification task with the CUDA backend
+add_test(NAME MainTrainClassificationCUDA/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -b cuda "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                                                                     "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+)
+add_test(NAME MainPredictClassificationCUDA/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b cuda "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/classification/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/classification/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+)
+
+# add minimal run test for the regression task with the CUDA backend
+add_test(NAME MainTrainRegressionCUDA/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -b cuda "${PLSSVM_REGRESSION_TEST_FILE}"
+                                                                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
+)
+add_test(NAME MainPredictRegressionCUDA/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b cuda "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/regression/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/regression/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
+)
+
 # add test as coverage dependency
 if (TARGET coverage)
     add_dependencies(coverage ${PLSSVM_CUDA_TEST_NAME})
diff --git a/tests/backends/CUDA/detail/device_ptr.cpp b/tests/backends/CUDA/detail/device_ptr.cpp
index f97d0d8ab..53b746fcb 100644
--- a/tests/backends/CUDA/detail/device_ptr.cpp
+++ b/tests/backends/CUDA/detail/device_ptr.cpp
@@ -10,14 +10,39 @@
 
 #include "plssvm/backends/CUDA/detail/device_ptr.cuh"  // plssvm::cuda::detail::device_ptr
 
+#include "plssvm/backends/CUDA/csvm.hpp"        // plssvm::cuda::csvc
+#include "plssvm/backends/CUDA/exceptions.hpp"  // plssvm::cuda::backend_exception
+#include "plssvm/shape.hpp"                     // plssvm::shape
+
 #include "tests/backends/generic_device_ptr_tests.hpp"  // generic device pointer tests to instantiate
 #include "tests/naming.hpp"                             // naming::test_parameter_to_name
-#include "tests/types_to_test.hpp"                      // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list}
+#include "tests/types_to_test.hpp"                      // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list, real_type_gtest}
 
+#include "gmock/gmock.h"  // EXPECT_THAT, ::testing::HasSubstr
 #include "gtest/gtest.h"  // INSTANTIATE_TYPED_TEST_SUITE_P
 
 #include <tuple>  // std::tuple
 
+template <typename T>
+class CUDADevicePtrConstruct : public ::testing::Test { };
+
+TYPED_TEST_SUITE(CUDADevicePtrConstruct, util::real_type_gtest, naming::test_parameter_to_name);
+
+TYPED_TEST(CUDADevicePtrConstruct, construct_invalid_queue) {
+    using real_type = util::test_parameter_type_at_t<0, TypeParam>;
+
+    // the number of devices
+    const std::size_t num_devices = plssvm::cuda::csvc{}.num_available_devices();
+
+    EXPECT_THROW_WHAT_MATCHER((plssvm::cuda::detail::device_ptr<real_type>(plssvm::shape{ 4, 4 }, plssvm::shape{ 4, 4 }, -1)),
+                              plssvm::cuda::backend_exception,
+                              ::testing::HasSubstr(fmt::format("Illegal device ID! Must be in range: [0, {}) but is -1.", num_devices)));
+
+    EXPECT_THROW_WHAT_MATCHER((plssvm::cuda::detail::device_ptr<real_type>(plssvm::shape{ 4, 4 }, plssvm::shape{ 4, 4 }, num_devices)),
+                              plssvm::cuda::backend_exception,
+                              ::testing::HasSubstr(fmt::format("Illegal device ID! Must be in range: [0, {}) but is {}.", num_devices, num_devices)));
+}
+
 template <typename T>
 struct cuda_device_ptr_test_type {
     using device_ptr_type = plssvm::cuda::detail::device_ptr<T>;
diff --git a/tests/backends/CUDA/mock_cuda_csvm.hpp b/tests/backends/CUDA/mock_cuda_csvm.hpp
index a54f7196e..5a0c4187f 100644
--- a/tests/backends/CUDA/mock_cuda_csvm.hpp
+++ b/tests/backends/CUDA/mock_cuda_csvm.hpp
@@ -15,6 +15,7 @@
 
 #include "plssvm/backends/CUDA/csvm.hpp"        // plssvm::cuda::csvm
 #include "plssvm/backends/execution_range.hpp"  // plssvm::detail::dim_type
+#include "plssvm/mpi/communicator.hpp"          // plssvm::mpi::communicator
 #include "plssvm/svm/csvm.hpp"                  // plssvm::csvm
 
 #include "gmock/gmock.h"  // MOCK_METHOD, ON_CALL, ::testing::Return
@@ -35,7 +36,7 @@ class mock_cuda_csvm final : public plssvm::cuda::csvm {
 
     template <typename... Args>
     explicit mock_cuda_csvm(Args &&...args) :
-        plssvm::csvm{ std::forward<Args>(args)... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, std::forward<Args>(args)... },
         base_type{} {
         this->fake_functions();
     }
diff --git a/tests/backends/HIP/CMakeLists.txt b/tests/backends/HIP/CMakeLists.txt
index e70d488af..2d86c5990 100644
--- a/tests/backends/HIP/CMakeLists.txt
+++ b/tests/backends/HIP/CMakeLists.txt
@@ -58,6 +58,26 @@ include(GoogleTest)
 include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
 discover_tests_with_death_test_filter(${PLSSVM_HIP_TEST_NAME})
 
+# add minimal run test for the classification task with the HIP backend
+add_test(NAME MainTrainClassificationHIP/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -b hip "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                                                                    "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+)
+add_test(NAME MainPredictClassificationHIP/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b hip "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/classification/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/classification/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+)
+
+# add minimal run test for the regression task with the HIP backend
+add_test(NAME MainTrainRegressionHIP/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -b hip "${PLSSVM_REGRESSION_TEST_FILE}"
+                                                                "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
+)
+add_test(NAME MainPredictRegressionHIP/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b hip "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/regression/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/regression/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
+)
+
 # add test as coverage dependency
 if (TARGET coverage)
     add_dependencies(coverage ${PLSSVM_HIP_TEST_NAME})
diff --git a/tests/backends/HIP/detail/device_ptr.hip b/tests/backends/HIP/detail/device_ptr.hip
index ecf8ce92a..b72d3f83c 100644
--- a/tests/backends/HIP/detail/device_ptr.hip
+++ b/tests/backends/HIP/detail/device_ptr.hip
@@ -8,16 +8,40 @@
  * @brief Tests for the HIP backend device pointer.
  */
 
+#include "plssvm/backends/HIP/csvm.hpp"                   // plssvm::hip::csvc
 #include "plssvm/backends/HIP/detail/device_ptr.hip.hpp"  // plssvm::hip::detail::device_ptr
+#include "plssvm/backends/HIP/exceptions.hpp"             // plssvm::hip::backend_exception
+#include "plssvm/shape.hpp"                               // plssvm::shape
 
 #include "tests/backends/generic_device_ptr_tests.hpp"  // generic device pointer tests to instantiate
 #include "tests/naming.hpp"                             // naming::test_parameter_to_name
-#include "tests/types_to_test.hpp"                      // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list}
+#include "tests/types_to_test.hpp"                      // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list, real_type_gtest}
 
+#include "gmock/gmock.h"  // EXPECT_THAT, ::testing::HasSubstr
 #include "gtest/gtest.h"  // INSTANTIATE_TYPED_TEST_SUITE_P
 
 #include <tuple>  // std::tuple
 
+template <typename T>
+class HIPDevicePtrConstruct : public ::testing::Test { };
+
+TYPED_TEST_SUITE(HIPDevicePtrConstruct, util::real_type_gtest, naming::test_parameter_to_name);
+
+TYPED_TEST(HIPDevicePtrConstruct, construct_invalid_queue) {
+    using real_type = util::test_parameter_type_at_t<0, TypeParam>;
+
+    // the number of devices
+    const std::size_t num_devices = plssvm::hip::csvc{}.num_available_devices();
+
+    EXPECT_THROW_WHAT_MATCHER((plssvm::hip::detail::device_ptr<real_type>(plssvm::shape{ 4, 4 }, plssvm::shape{ 4, 4 }, -1)),
+                              plssvm::hip::backend_exception,
+                              ::testing::HasSubstr(fmt::format("Illegal device ID! Must be in range: [0, {}) but is -1.", num_devices)));
+
+    EXPECT_THROW_WHAT_MATCHER((plssvm::hip::detail::device_ptr<real_type>(plssvm::shape{ 4, 4 }, plssvm::shape{ 4, 4 }, num_devices)),
+                              plssvm::hip::backend_exception,
+                              ::testing::HasSubstr(fmt::format("Illegal device ID! Must be in range: [0, {}) but is {}.", num_devices, num_devices)));
+}
+
 template <typename T>
 struct hip_device_ptr_test_type {
     using device_ptr_type = plssvm::hip::detail::device_ptr<T>;
diff --git a/tests/backends/HIP/mock_hip_csvm.hpp b/tests/backends/HIP/mock_hip_csvm.hpp
index 54611862a..0f19c730a 100644
--- a/tests/backends/HIP/mock_hip_csvm.hpp
+++ b/tests/backends/HIP/mock_hip_csvm.hpp
@@ -15,6 +15,7 @@
 
 #include "plssvm/backends/execution_range.hpp"  // plssvm::detail::dim_type
 #include "plssvm/backends/HIP/csvm.hpp"         // plssvm::hip::csvm
+#include "plssvm/mpi/communicator.hpp"          // plssvm::mpi::communicator
 #include "plssvm/svm/csvm.hpp"                  // plssvm::csvm
 
 #include "gmock/gmock.h"  // MOCK_METHOD, ON_CALL, ::testing::Return
@@ -35,7 +36,7 @@ class mock_hip_csvm final : public plssvm::hip::csvm {
 
     template <typename... Args>
     explicit mock_hip_csvm(Args &&...args) :
-        plssvm::csvm{ std::forward<Args>(args)... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, std::forward<Args>(args)... },
         base_type{} {
         this->fake_functions();
     }
diff --git a/tests/backends/HPX/CMakeLists.txt b/tests/backends/HPX/CMakeLists.txt
index 171c5ff23..0b9cc116e 100644
--- a/tests/backends/HPX/CMakeLists.txt
+++ b/tests/backends/HPX/CMakeLists.txt
@@ -23,6 +23,26 @@ include(GoogleTest)
 include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
 discover_tests_with_death_test_filter(${PLSSVM_HPX_TEST_NAME})
 
+# add minimal run test for the classification task with the HPX backend
+add_test(NAME MainTrainClassificationHPX/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -b hpx "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                                                                    "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+)
+add_test(NAME MainPredictClassificationHPX/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b hpx "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/classification/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/classification/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+)
+
+# add minimal run test for the regression task with the HPX backend
+add_test(NAME MainTrainRegressionHPX/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -b hpx "${PLSSVM_REGRESSION_TEST_FILE}"
+                                                                "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
+)
+add_test(NAME MainPredictRegressionHPX/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b hpx "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/regression/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/regression/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
+)
+
 # add test as coverage dependency
 if (TARGET coverage)
     add_dependencies(coverage ${PLSSVM_HPX_TEST_NAME})
diff --git a/tests/backends/HPX/hpx_csvm.cpp b/tests/backends/HPX/hpx_csvm.cpp
index 6edcc2c55..0d5d9003e 100644
--- a/tests/backends/HPX/hpx_csvm.cpp
+++ b/tests/backends/HPX/hpx_csvm.cpp
@@ -12,7 +12,7 @@
 #include "plssvm/backend_types.hpp"                                                // plssvm::csvm_to_backend_type_v
 #include "plssvm/backends/HPX/csvm.hpp"                                            // plssvm::hpx::{csvm, csvc, csvr}
 #include "plssvm/backends/HPX/exceptions.hpp"                                      // plssvm::hpx::backend_exception
-#include "plssvm/backends/HPX/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::device_kernel_symm
+#include "plssvm/backends/HPX/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::{device_kernel_symm, device_ce_kernel_symm_mirror}
 #include "plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::hpx::device_kernel_assembly
 #include "plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::hpx::device_kernel_assembly_symm
 #include "plssvm/backends/HPX/kernel/predict_kernel.hpp"                           // plssvm::hpx::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
@@ -212,6 +212,7 @@ using plssvm::hpx::detail::device_kernel_assembly_symm;
 using plssvm::hpx::detail::device_kernel_predict;
 using plssvm::hpx::detail::device_kernel_predict_linear;
 using plssvm::hpx::detail::device_kernel_symm;
+using plssvm::hpx::detail::device_kernel_symm_mirror;
 using plssvm::hpx::detail::device_kernel_w_linear;
 #include "tests/backends/generic_csvm_tests.hpp"  // generic backend C-SVM tests to instantiate
 
diff --git a/tests/backends/HPX/mock_hpx_csvm.hpp b/tests/backends/HPX/mock_hpx_csvm.hpp
index fe4247d1c..697eac2b5 100644
--- a/tests/backends/HPX/mock_hpx_csvm.hpp
+++ b/tests/backends/HPX/mock_hpx_csvm.hpp
@@ -15,6 +15,7 @@
 #pragma once
 
 #include "plssvm/backends/HPX/csvm.hpp"  // plssvm::hpx::csvm
+#include "plssvm/mpi/communicator.hpp"   // plssvm::mpi::communicator
 #include "plssvm/svm/csvm.hpp"           // plssvm::csvm
 
 /**
@@ -26,7 +27,7 @@ class mock_hpx_csvm final : public plssvm::hpx::csvm {
   public:
     template <typename... Args>
     explicit mock_hpx_csvm(Args &&...args) :
-        plssvm::csvm{ std::forward<Args>(args)... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, std::forward<Args>(args)... },
         base_type{} { }
 
     // make protected member functions public
diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt
index b6a5a1c0c..d1f12507c 100644
--- a/tests/backends/Kokkos/CMakeLists.txt
+++ b/tests/backends/Kokkos/CMakeLists.txt
@@ -18,7 +18,6 @@ set(PLSSVM_KOKKOS_TEST_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
     ${CMAKE_CURRENT_LIST_DIR}/kokkos_csvm.cpp
     ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/execution_space.cpp
     ${CMAKE_CURRENT_LIST_DIR}/execution_space_type_traits.cpp
 )
 
@@ -27,13 +26,13 @@ find_package(Kokkos REQUIRED)
 # add test executable
 add_executable(${PLSSVM_KOKKOS_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../main.cpp ${PLSSVM_KOKKOS_TEST_SOURCES})
 
-if (Kokkos_ENABLE_CUDA)
-    # fix template limit when using Kokkos::Cuda
-    target_compile_options(${PLSSVM_KOKKOS_TEST_NAME} PRIVATE -Xcudafe --pending_instantiations=0)
-
+if (Kokkos_ENABLE_CUDA AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     # tests won't compile with nvcc
     if (NOT PLSSVM_TEST_WITH_REDUCED_LABEL_TYPES)
-        message(FATAL_ERROR "Due to template instantiation limits within nvcc, only reduced label type tests are currently supported!")
+        message(
+            FATAL_ERROR
+                "Due to template instantiation limits within nvcc, only reduced label type tests are currently supported! Alternatively, compile Kokkos CUDA with clang. "
+        )
     endif ()
 endif ()
 
@@ -48,6 +47,26 @@ include(GoogleTest)
 include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
 discover_tests_with_death_test_filter(${PLSSVM_KOKKOS_TEST_NAME})
 
+# add minimal run test for the classification task with the Kokkos backend
+add_test(NAME MainTrainClassificationKokkos/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -b kokkos "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                                                                       "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+)
+add_test(NAME MainPredictClassificationKokkos/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b kokkos "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/classification/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/classification/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+)
+
+# add minimal run test for the regression task with the Kokkos backend
+add_test(NAME MainTrainRegressionKokkos/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -b kokkos "${PLSSVM_REGRESSION_TEST_FILE}"
+                                                                   "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
+)
+add_test(NAME MainPredictRegressionKokkos/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b kokkos "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/regression/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/regression/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
+)
+
 # add test as coverage dependency
 if (TARGET coverage)
     add_dependencies(coverage ${PLSSVM_KOKKOS_TEST_NAME})
diff --git a/tests/backends/Kokkos/detail/device_wrapper.cpp b/tests/backends/Kokkos/detail/device_wrapper.cpp
index ca644ece7..64af4fe4d 100644
--- a/tests/backends/Kokkos/detail/device_wrapper.cpp
+++ b/tests/backends/Kokkos/detail/device_wrapper.cpp
@@ -13,6 +13,7 @@
 #include "plssvm/backends/Kokkos/detail/utility.hpp"   // plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping
 #include "plssvm/backends/Kokkos/execution_space.hpp"  // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v}
 #include "plssvm/detail/utility.hpp"                   // plssvm::detail::contains
+#include "plssvm/mpi/communicator.hpp"                 // plssvm::mpi::communicator
 #include "plssvm/target_platforms.hpp"                 // plssvm::target_platform
 
 #include "Kokkos_Core.hpp"  // Kokkos::DefaultExecutionSpace
@@ -95,7 +96,7 @@ struct device_list_test {
                 break;
             }
         }
-        const std::vector<plssvm::kokkos::detail::device_wrapper> devices = plssvm::kokkos::detail::get_device_list(space, default_target);
+        const std::vector<plssvm::kokkos::detail::device_wrapper> devices = plssvm::kokkos::detail::get_device_list(space, default_target, plssvm::mpi::communicator{});
 
         // check the number of returned devices
         if (space == plssvm::kokkos::execution_space::cuda || space == plssvm::kokkos::execution_space::hip || space == plssvm::kokkos::execution_space::sycl) {
diff --git a/tests/backends/Kokkos/kokkos_csvm.cpp b/tests/backends/Kokkos/kokkos_csvm.cpp
index 90cfb0ff6..9e9b4d83f 100644
--- a/tests/backends/Kokkos/kokkos_csvm.cpp
+++ b/tests/backends/Kokkos/kokkos_csvm.cpp
@@ -210,7 +210,13 @@ TYPED_TEST(KokkosCSVMConstructor, construct_execution_space_and_parameter) {  //
 
 #if defined(KOKKOS_ENABLE_SYCL)
     // explicitly providing the SYCL execution space should work
-    EXPECT_NO_THROW((csvm_type{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }));
+    if (target_is_available(plssvm::target_platform::gpu_nvidia) || target_is_available(plssvm::target_platform::gpu_amd) || target_is_available(plssvm::target_platform::gpu_intel)) {
+        EXPECT_NO_THROW((csvm_type{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }));
+    } else {
+        EXPECT_THROW_WHAT((csvm_type{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace SYCL!");
+    }
 #else
     EXPECT_THROW_WHAT((csvm_type{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }),
                       plssvm::kokkos::backend_exception,
@@ -549,7 +555,13 @@ TYPED_TEST(KokkosCSVMConstructor, construct_execution_space_and_named_args) {  /
 
 #if defined(KOKKOS_ENABLE_SYCL)
     // explicitly providing the SYCL execution space should work
-    EXPECT_NO_THROW((csvm_type{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }));
+    if (target_is_available(plssvm::target_platform::gpu_nvidia) || target_is_available(plssvm::target_platform::gpu_amd) || target_is_available(plssvm::target_platform::gpu_intel)) {
+        EXPECT_NO_THROW((csvm_type{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }));
+    } else {
+        EXPECT_THROW_WHAT((csvm_type{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace SYCL!");
+    }
 #else
     EXPECT_THROW_WHAT((csvm_type{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }),
                       plssvm::kokkos::backend_exception,
@@ -801,10 +813,7 @@ INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunction, kokk
 // generic C-SVC tests
 INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVC, GenericCSVC, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name);
 INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVC, GenericCSVCKernelFunctionClassification, kokkos_classification_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name);
-#if !defined(KOKKOS_ENABLE_CUDA)
-// testcase doesn't compile with Kokkos::Cuda's nvcc due to template instantiation limits
 INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVC, GenericCSVCSolverKernelFunctionClassification, kokkos_classification_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name);
-#endif
 // generic C-SVR tests
 INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVR, GenericCSVR, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name);
 INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVR, GenericCSVRKernelFunction, kokkos_regression_label_type_and_kernel_function_type_gtest, naming::test_parameter_to_name);
diff --git a/tests/backends/Kokkos/mock_kokkos_csvm.hpp b/tests/backends/Kokkos/mock_kokkos_csvm.hpp
index 7400f8472..e39bb6cc8 100644
--- a/tests/backends/Kokkos/mock_kokkos_csvm.hpp
+++ b/tests/backends/Kokkos/mock_kokkos_csvm.hpp
@@ -15,6 +15,7 @@
 
 #include "plssvm/backends/execution_range.hpp"  // plssvm::detail::dim_type
 #include "plssvm/backends/Kokkos/csvm.hpp"      // plssvm::kokkos::csvm
+#include "plssvm/mpi/communicator.hpp"          // plssvm::mpi::communicator
 #include "plssvm/svm/csvm.hpp"                  // plssvm::csvm
 #include "plssvm/target_platforms.hpp"          // plssvm::target_platform
 
@@ -36,7 +37,7 @@ class mock_kokkos_csvm final : public plssvm::kokkos::csvm {
 
     template <typename... Args>
     explicit mock_kokkos_csvm(Args &&...args) :
-        plssvm::csvm{ args... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, args... },
         base_type(plssvm::target_platform::automatic, std::forward<Args>(args)...) {
         this->fake_functions();
     }
diff --git a/tests/backends/Kokkos/utility.hpp b/tests/backends/Kokkos/utility.hpp
index 3c3458198..98fa68aba 100644
--- a/tests/backends/Kokkos/utility.hpp
+++ b/tests/backends/Kokkos/utility.hpp
@@ -27,7 +27,7 @@ namespace util {
 #if defined(KOKKOS_ENABLE_HIP) && (defined(PLSSVM_HAS_NVIDIA_TARGET) || defined(PLSSVM_HAS_AMD_TARGET))  // for Kokkos::HIP, an NVIDIA or AMD target must be available
         plssvm::kokkos::execution_space::hip,
 #endif
-#if defined(KOKKOS_ENABLE_SYCL)  // for Kokkos::SYCL, any target is ok
+#if defined(KOKKOS_ENABLE_SYCL) && (defined(PLSSVM_HAS_NVIDIA_TARGET) || defined(PLSSVM_HAS_AMD_TARGET) || defined(PLSSVM_HAS_INTEL_TARGET))  // for Kokkos::SYCL, any target is ok except CPUs
         plssvm::kokkos::execution_space::sycl,
 #endif
 #if defined(KOKKOS_ENABLE_HPX) && defined(PLSSVM_HAS_CPU_TARGET)  // for Kokkos::Experimental::HPX, a CPU target must be available
diff --git a/tests/backends/OpenCL/CMakeLists.txt b/tests/backends/OpenCL/CMakeLists.txt
index d656b6237..5f4e8b006 100644
--- a/tests/backends/OpenCL/CMakeLists.txt
+++ b/tests/backends/OpenCL/CMakeLists.txt
@@ -11,6 +11,7 @@ set(PLSSVM_OPENCL_TEST_NAME OpenCL_tests)
 set(PLSSVM_OPENCL_TEST_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/error_code.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/jit_info.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp
     ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
     ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp
@@ -28,6 +29,26 @@ include(GoogleTest)
 include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
 discover_tests_with_death_test_filter(${PLSSVM_OPENCL_TEST_NAME})
 
+# add minimal run test for the classification task with the OpenCL backend
+add_test(NAME MainTrainClassificationOpenCL/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -b opencl "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                                                                       "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+)
+add_test(NAME MainPredictClassificationOpenCL/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b opencl "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/classification/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/classification/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+)
+
+# add minimal run test for the regression task with the OpenCL backend
+add_test(NAME MainTrainRegressionOpenCL/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -b opencl "${PLSSVM_REGRESSION_TEST_FILE}"
+                                                                   "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
+)
+add_test(NAME MainPredictRegressionOpenCL/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b opencl "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/regression/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/regression/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
+)
+
 # add test as coverage dependency
 if (TARGET coverage)
     add_dependencies(coverage ${PLSSVM_OPENCL_TEST_NAME})
diff --git a/tests/backends/OpenCL/detail/jit_info.cpp b/tests/backends/OpenCL/detail/jit_info.cpp
new file mode 100644
index 000000000..675146acf
--- /dev/null
+++ b/tests/backends/OpenCL/detail/jit_info.cpp
@@ -0,0 +1,73 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the JIT info struct necessary for the OpenCL backend.
+ */
+
+#include "plssvm/backends/OpenCL/detail/jit_info.hpp"  // plssvm::opencl::detail::jit_info
+
+#include "tests/custom_test_macros.hpp"  // EXPECT_CONVERSION_TO_STRING
+
+#include "gmock/gmock.h"  // EXPECT_THAT, ::testing::HasSubstr
+#include "gtest/gtest.h"  // TEST, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE
+
+#include <chrono>  // std::chrono literals
+#include <string>  // std::string
+
+// check whether the plssvm::opencl::detail::jit_info::caching_status -> std::string conversions are correct
+TEST(OpenCLJITInfoCachingStatus, to_string) {
+    // check conversions to std::string
+    EXPECT_CONVERSION_TO_STRING(plssvm::opencl::detail::jit_info::caching_status::success, "success");
+    EXPECT_CONVERSION_TO_STRING(plssvm::opencl::detail::jit_info::caching_status::error_no_cached_files, "no cached files exist (checksum missmatch)");
+    EXPECT_CONVERSION_TO_STRING(plssvm::opencl::detail::jit_info::caching_status::error_invalid_number_of_cached_files, "invalid number of cached files");
+}
+
+TEST(OpenCLJITInfoCachingStatus, to_string_unknown) {
+    // check conversions to std::string from unknown caching_status
+    EXPECT_CONVERSION_TO_STRING(static_cast<plssvm::opencl::detail::jit_info::caching_status>(3), "unknown");
+}
+
+TEST(OpenCLJITInfo, default_construct) {
+    // default construct a JIT info struct
+    const plssvm::opencl::detail::jit_info info{};
+
+    EXPECT_FALSE(info.use_ptx_inline);
+    EXPECT_EQ(info.cache_state, plssvm::opencl::detail::jit_info::caching_status::success);
+    EXPECT_EQ(info.cache_dir, std::string{});
+    EXPECT_EQ(info.duration, std::chrono::milliseconds{});
+}
+
+TEST(OpenCLJITInfo, construct) {
+    using namespace std::chrono_literals;
+
+    // construct a JIT info struct
+    const plssvm::opencl::detail::jit_info info{
+        true,
+        plssvm::opencl::detail::jit_info::caching_status::error_no_cached_files,
+        "jit/file/path",
+        250ms
+    };
+
+    EXPECT_TRUE(info.use_ptx_inline);
+    EXPECT_EQ(info.cache_state, plssvm::opencl::detail::jit_info::caching_status::error_no_cached_files);
+    EXPECT_EQ(info.cache_dir, std::string{ "jit/file/path" });
+    EXPECT_EQ(info.duration, 250ms);
+}
+
+TEST(OpenCLJITInfo, create_jit_report) {
+    using namespace std::chrono_literals;
+
+    // construct a JIT info struct
+    const plssvm::opencl::detail::jit_info info{
+        true,
+        plssvm::opencl::detail::jit_info::caching_status::error_invalid_number_of_cached_files,
+        "jit/file/path",
+        250ms
+    };
+
+    EXPECT_THAT(plssvm::opencl::detail::create_jit_report(info), ::testing::HasSubstr("250ms; PTX inline; cache: invalid number of cached files (jit/file/path)"));
+}
diff --git a/tests/backends/OpenCL/mock_opencl_csvm.hpp b/tests/backends/OpenCL/mock_opencl_csvm.hpp
index 2a533db9d..f0ae913d6 100644
--- a/tests/backends/OpenCL/mock_opencl_csvm.hpp
+++ b/tests/backends/OpenCL/mock_opencl_csvm.hpp
@@ -15,6 +15,7 @@
 
 #include "plssvm/backends/execution_range.hpp"  // plssvm::detail::dim_type
 #include "plssvm/backends/OpenCL/csvm.hpp"      // plssvm::opencl::csvm
+#include "plssvm/mpi/communicator.hpp"          // plssvm::mpi::communicator
 #include "plssvm/svm/csvm.hpp"                  // plssvm::csvm
 
 #include "gmock/gmock.h"  // MOCK_METHOD, ON_CALL, ::testing::Return
@@ -35,7 +36,7 @@ class mock_opencl_csvm : public plssvm::opencl::csvm {
 
     template <typename... Args>
     explicit mock_opencl_csvm(Args &&...args) :
-        plssvm::csvm{ std::forward<Args>(args)... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, std::forward<Args>(args)... },
         base_type{} {
         this->fake_functions();
     }
diff --git a/tests/backends/OpenMP/CMakeLists.txt b/tests/backends/OpenMP/CMakeLists.txt
index 12f09273b..bfc76436d 100644
--- a/tests/backends/OpenMP/CMakeLists.txt
+++ b/tests/backends/OpenMP/CMakeLists.txt
@@ -21,6 +21,26 @@ include(GoogleTest)
 include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
 discover_tests_with_death_test_filter(${PLSSVM_OPENMP_TEST_NAME})
 
+# add minimal run test for the classification task with the OpenMP backend
+add_test(NAME MainTrainClassificationOpenMP/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -b openmp "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                                                                       "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+)
+add_test(NAME MainPredictClassificationOpenMP/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b openmp "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/classification/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/classification/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+)
+
+# add minimal run test for the regression task with the OpenMP backend
+add_test(NAME MainTrainRegressionOpenMP/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -b openmp "${PLSSVM_REGRESSION_TEST_FILE}"
+                                                                   "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
+)
+add_test(NAME MainPredictRegressionOpenMP/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b openmp "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/regression/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/regression/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
+)
+
 # add test as coverage dependency
 if (TARGET coverage)
     add_dependencies(coverage ${PLSSVM_OPENMP_TEST_NAME})
diff --git a/tests/backends/OpenMP/mock_openmp_csvm.hpp b/tests/backends/OpenMP/mock_openmp_csvm.hpp
index ea1d3894d..c09b3b765 100644
--- a/tests/backends/OpenMP/mock_openmp_csvm.hpp
+++ b/tests/backends/OpenMP/mock_openmp_csvm.hpp
@@ -14,6 +14,7 @@
 #pragma once
 
 #include "plssvm/backends/OpenMP/csvm.hpp"  // plssvm::openmp::csvm
+#include "plssvm/mpi/communicator.hpp"      // plssvm::mpi::communicator
 #include "plssvm/svm/csvm.hpp"              // plssvm::csvm
 
 /**
@@ -25,7 +26,7 @@ class mock_openmp_csvm final : public plssvm::openmp::csvm {
   public:
     template <typename... Args>
     explicit mock_openmp_csvm(Args &&...args) :
-        plssvm::csvm{ std::forward<Args>(args)... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, std::forward<Args>(args)... },
         base_type{} { }
 
     // make protected member functions public
diff --git a/tests/backends/OpenMP/openmp_csvm.cpp b/tests/backends/OpenMP/openmp_csvm.cpp
index 52cb36939..f4d87e7b6 100644
--- a/tests/backends/OpenMP/openmp_csvm.cpp
+++ b/tests/backends/OpenMP/openmp_csvm.cpp
@@ -11,7 +11,7 @@
 #include "plssvm/backend_types.hpp"                                                   // plssvm::csvm_to_backend_type_v
 #include "plssvm/backends/OpenMP/csvm.hpp"                                            // plssvm::openmp::{csvm, csvc, csvr}
 #include "plssvm/backends/OpenMP/exceptions.hpp"                                      // plssvm::openmp::backend_exception
-#include "plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp"                         // plssvm::openmp::device_kernel_symm
+#include "plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp"                         // plssvm::openmp::{device_kernel_symm, device_ce_kernel_symm_mirror}
 #include "plssvm/backends/OpenMP/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::openmp::device_kernel_assembly
 #include "plssvm/backends/OpenMP/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::openmp::device_kernel_assembly_symm
 #include "plssvm/backends/OpenMP/kernel/predict_kernel.hpp"                           // plssvm::openmp::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
@@ -214,6 +214,7 @@ using plssvm::openmp::detail::device_kernel_assembly_symm;
 using plssvm::openmp::detail::device_kernel_predict;
 using plssvm::openmp::detail::device_kernel_predict_linear;
 using plssvm::openmp::detail::device_kernel_symm;
+using plssvm::openmp::detail::device_kernel_symm_mirror;
 using plssvm::openmp::detail::device_kernel_w_linear;
 #include "tests/backends/generic_csvm_tests.hpp"  // generic backend C-SVM tests to instantiate
 
diff --git a/tests/backends/SYCL/AdaptiveCpp/CMakeLists.txt b/tests/backends/SYCL/AdaptiveCpp/CMakeLists.txt
index 3f32feb92..31967bce4 100644
--- a/tests/backends/SYCL/AdaptiveCpp/CMakeLists.txt
+++ b/tests/backends/SYCL/AdaptiveCpp/CMakeLists.txt
@@ -37,6 +37,34 @@ include(GoogleTest)
 include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
 discover_tests_with_death_test_filter(${PLSSVM_SYCL_ADAPTIVECPP_TEST_NAME})
 
+# add minimal run test for the classification task with the AdaptiveCpp SYCL backend
+add_test(NAME MainTrainClassificationSYCLAdaptiveCpp/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -b sycl --sycl_implementation_type acpp "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+)
+add_test(
+    NAME MainPredictClassificationSYCLAdaptiveCpp/executable_minimal
+    COMMAND
+        ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b sycl --sycl_implementation_type acpp
+        "${CMAKE_CURRENT_LIST_DIR}/../../../data/libsvm/classification/5x4.libsvm" # test file
+        "${CMAKE_CURRENT_LIST_DIR}/../../../data/model/classification/5x4.libsvm.model" # model file
+        "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+)
+
+# add minimal run test for the regression task with the AdaptiveCpp SYCL backend
+add_test(NAME MainTrainRegressionSYCLAdaptiveCpp/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -b sycl --sycl_implementation_type acpp "${PLSSVM_REGRESSION_TEST_FILE}"
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
+)
+add_test(
+    NAME MainPredictRegressionSYCLAdaptiveCpp/executable_minimal
+    COMMAND
+        ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b sycl --sycl_implementation_type acpp
+        "${CMAKE_CURRENT_LIST_DIR}/../../../data/libsvm/regression/5x4.libsvm" # test file
+        "${CMAKE_CURRENT_LIST_DIR}/../../../data/model/regression/5x4.libsvm.model" # model file
+        "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
+)
+
 # add test as coverage dependency
 if (TARGET coverage)
     add_dependencies(coverage ${PLSSVM_SYCL_ADAPTIVECPP_TEST_NAME})
diff --git a/tests/backends/SYCL/AdaptiveCpp/mock_adaptivecpp_csvm.hpp b/tests/backends/SYCL/AdaptiveCpp/mock_adaptivecpp_csvm.hpp
index 68ea14538..18f31cb34 100644
--- a/tests/backends/SYCL/AdaptiveCpp/mock_adaptivecpp_csvm.hpp
+++ b/tests/backends/SYCL/AdaptiveCpp/mock_adaptivecpp_csvm.hpp
@@ -15,6 +15,7 @@
 
 #include "plssvm/backends/execution_range.hpp"        // plssvm::detail::dim_type
 #include "plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp"  // plssvm::adaptivecpp::csvm
+#include "plssvm/mpi/communicator.hpp"                // plssvm::mpi::communicator
 #include "plssvm/svm/csvm.hpp"                        // plssvm::csvm
 #include "plssvm/target_platforms.hpp"                // plssvm::target_platform
 
@@ -36,7 +37,7 @@ class mock_adaptivecpp_csvm final : public plssvm::adaptivecpp::csvm {
 
     template <typename... Args>
     explicit mock_adaptivecpp_csvm(Args &&...args) :
-        plssvm::csvm{ args... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, args... },
         base_type(plssvm::target_platform::automatic, std::forward<Args>(args)...) {
         this->fake_functions();
     }
diff --git a/tests/backends/SYCL/DPCPP/CMakeLists.txt b/tests/backends/SYCL/DPCPP/CMakeLists.txt
index bdfc317ad..e36545f08 100644
--- a/tests/backends/SYCL/DPCPP/CMakeLists.txt
+++ b/tests/backends/SYCL/DPCPP/CMakeLists.txt
@@ -37,6 +37,34 @@ include(GoogleTest)
 include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
 discover_tests_with_death_test_filter(${PLSSVM_SYCL_DPCPP_TEST_NAME})
 
+# add minimal run test for the classification task with the DPC++ SYCL backend
+add_test(NAME MainTrainClassificationSYCLDPCPP/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -b sycl --sycl_implementation_type dpcpp "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+)
+add_test(
+    NAME MainPredictClassificationSYCLDPCPP/executable_minimal
+    COMMAND
+        ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b sycl --sycl_implementation_type dpcpp
+        "${CMAKE_CURRENT_LIST_DIR}/../../../data/libsvm/classification/5x4.libsvm" # test file
+        "${CMAKE_CURRENT_LIST_DIR}/../../../data/model/classification/5x4.libsvm.model" # model file
+        "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+)
+
+# add minimal run test for the regression task with the DPC++ SYCL backend
+add_test(NAME MainTrainRegressionSYCLDPCPP/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -b sycl --sycl_implementation_type dpcpp "${PLSSVM_REGRESSION_TEST_FILE}"
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
+)
+add_test(
+    NAME MainPredictRegressionSYCLDPCPP/executable_minimal
+    COMMAND
+        ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b sycl --sycl_implementation_type dpcpp
+        "${CMAKE_CURRENT_LIST_DIR}/../../../data/libsvm/regression/5x4.libsvm" # test file
+        "${CMAKE_CURRENT_LIST_DIR}/../../../data/model/regression/5x4.libsvm.model" # model file
+        "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
+)
+
 # add test as coverage dependency
 if (TARGET coverage)
     add_dependencies(coverage ${PLSSVM_SYCL_DPCPP_TEST_NAME})
diff --git a/tests/backends/SYCL/DPCPP/mock_dpcpp_csvm.hpp b/tests/backends/SYCL/DPCPP/mock_dpcpp_csvm.hpp
index e16ac8254..49e45936a 100644
--- a/tests/backends/SYCL/DPCPP/mock_dpcpp_csvm.hpp
+++ b/tests/backends/SYCL/DPCPP/mock_dpcpp_csvm.hpp
@@ -15,7 +15,9 @@
 
 #include "plssvm/backends/execution_range.hpp"  // plssvm::detail::dim_type
 #include "plssvm/backends/SYCL/DPCPP/csvm.hpp"  // plssvm::dpcpp::csvm
+#include "plssvm/mpi/communicator.hpp"          // plssvm::mpi::communicator
 #include "plssvm/svm/csvm.hpp"                  // plssvm::csvm
+#include "plssvm/target_platforms.hpp"          // plssvm::target_platform
 
 #include "gmock/gmock.h"  // MOCK_METHOD, ON_CALL, ::testing::Return
 
@@ -35,7 +37,7 @@ class mock_dpcpp_csvm final : public plssvm::dpcpp::csvm {
 
     template <typename... Args>
     explicit mock_dpcpp_csvm(Args &&...args) :
-        plssvm::csvm{ args... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, args... },
         base_type(plssvm::target_platform::automatic, std::forward<Args>(args)...) {
         this->fake_functions();
     }
diff --git a/tests/backends/execution_range.cpp b/tests/backends/execution_range.cpp
index 75fe16ef2..371c5fa94 100644
--- a/tests/backends/execution_range.cpp
+++ b/tests/backends/execution_range.cpp
@@ -96,48 +96,48 @@ TEST(DimType, swap_free_function) {
 
 TEST(DimType, equality) {
     // create dim types
-    constexpr plssvm::detail::dim_type dim1{};
-    constexpr plssvm::detail::dim_type dim2{ 64ull };
-    constexpr plssvm::detail::dim_type dim3{ 64ull, 32ull };
-    constexpr plssvm::detail::dim_type dim4{ 64ull, 32ull, 16ull };
-    constexpr plssvm::detail::dim_type dim5{ 32ull };
-    constexpr plssvm::detail::dim_type dim6{ 32ull, 16ull };
-    constexpr plssvm::detail::dim_type dim7{ 32ull, 16ull, 8ull };
+    constexpr plssvm::detail::dim_type dim_1{};
+    constexpr plssvm::detail::dim_type dim_2{ 64ull };
+    constexpr plssvm::detail::dim_type dim_3{ 64ull, 32ull };
+    constexpr plssvm::detail::dim_type dim_4{ 64ull, 32ull, 16ull };
+    constexpr plssvm::detail::dim_type dim_5{ 32ull };
+    constexpr plssvm::detail::dim_type dim_6{ 32ull, 16ull };
+    constexpr plssvm::detail::dim_type dim_7{ 32ull, 16ull, 8ull };
 
     // check for equality
-    EXPECT_TRUE(dim1 == dim1);
-    EXPECT_TRUE(dim2 == dim2);
-    EXPECT_TRUE(dim3 == dim3);
-    EXPECT_TRUE(dim4 == dim4);
-    EXPECT_FALSE(dim2 == dim3);
-    EXPECT_FALSE(dim2 == dim4);
-    EXPECT_FALSE(dim3 == dim4);
-    EXPECT_FALSE(dim2 == dim5);
-    EXPECT_FALSE(dim3 == dim6);
-    EXPECT_FALSE(dim4 == dim7);
+    EXPECT_TRUE(dim_1 == dim_1);
+    EXPECT_TRUE(dim_2 == dim_2);
+    EXPECT_TRUE(dim_3 == dim_3);
+    EXPECT_TRUE(dim_4 == dim_4);
+    EXPECT_FALSE(dim_2 == dim_3);
+    EXPECT_FALSE(dim_2 == dim_4);
+    EXPECT_FALSE(dim_3 == dim_4);
+    EXPECT_FALSE(dim_2 == dim_5);
+    EXPECT_FALSE(dim_3 == dim_6);
+    EXPECT_FALSE(dim_4 == dim_7);
 }
 
 TEST(DimType, inequality) {
     // create dim types
-    constexpr plssvm::detail::dim_type dim1{};
-    constexpr plssvm::detail::dim_type dim2{ 64ull };
-    constexpr plssvm::detail::dim_type dim3{ 64ull, 32ull };
-    constexpr plssvm::detail::dim_type dim4{ 64ull, 32ull, 16ull };
-    constexpr plssvm::detail::dim_type dim5{ 32ull };
-    constexpr plssvm::detail::dim_type dim6{ 32ull, 16ull };
-    constexpr plssvm::detail::dim_type dim7{ 32ull, 16ull, 8ull };
+    constexpr plssvm::detail::dim_type dim_1{};
+    constexpr plssvm::detail::dim_type dim_2{ 64ull };
+    constexpr plssvm::detail::dim_type dim_3{ 64ull, 32ull };
+    constexpr plssvm::detail::dim_type dim_4{ 64ull, 32ull, 16ull };
+    constexpr plssvm::detail::dim_type dim_5{ 32ull };
+    constexpr plssvm::detail::dim_type dim_6{ 32ull, 16ull };
+    constexpr plssvm::detail::dim_type dim_7{ 32ull, 16ull, 8ull };
 
     // check for inequality
-    EXPECT_FALSE(dim1 != dim1);
-    EXPECT_FALSE(dim2 != dim2);
-    EXPECT_FALSE(dim3 != dim3);
-    EXPECT_FALSE(dim4 != dim4);
-    EXPECT_TRUE(dim2 != dim3);
-    EXPECT_TRUE(dim2 != dim4);
-    EXPECT_TRUE(dim3 != dim4);
-    EXPECT_TRUE(dim2 != dim5);
-    EXPECT_TRUE(dim3 != dim6);
-    EXPECT_TRUE(dim4 != dim7);
+    EXPECT_FALSE(dim_1 != dim_1);
+    EXPECT_FALSE(dim_2 != dim_2);
+    EXPECT_FALSE(dim_3 != dim_3);
+    EXPECT_FALSE(dim_4 != dim_4);
+    EXPECT_TRUE(dim_2 != dim_3);
+    EXPECT_TRUE(dim_2 != dim_4);
+    EXPECT_TRUE(dim_3 != dim_4);
+    EXPECT_TRUE(dim_2 != dim_5);
+    EXPECT_TRUE(dim_3 != dim_6);
+    EXPECT_TRUE(dim_4 != dim_7);
 }
 
 TEST(DimType, to_string) {
diff --git a/tests/backends/generic_base_csvc_tests.hpp b/tests/backends/generic_base_csvc_tests.hpp
index 0ccd73d87..b110978a0 100644
--- a/tests/backends/generic_base_csvc_tests.hpp
+++ b/tests/backends/generic_base_csvc_tests.hpp
@@ -126,6 +126,12 @@ TYPED_TEST_P(GenericCSVCKernelFunctionClassification, predict) {
 
     // check the calculated result for correctness
     EXPECT_EQ(calculated, test_data.labels().value().get());
+
+    // for the linear kernel, predict again to check whether reusing the w vector works as intended
+    if (kernel == plssvm::kernel_function_type::linear) {
+        const std::vector<label_type> calculated_second = svc.predict(model, test_data);
+        EXPECT_EQ(calculated_second, test_data.labels().value().get());
+    }
 }
 
 TYPED_TEST_P(GenericCSVCKernelFunctionClassification, score_model) {
diff --git a/tests/backends/generic_base_csvm_tests.hpp b/tests/backends/generic_base_csvm_tests.hpp
index c3f83ae18..fd93963c1 100644
--- a/tests/backends/generic_base_csvm_tests.hpp
+++ b/tests/backends/generic_base_csvm_tests.hpp
@@ -23,6 +23,7 @@
 #include "plssvm/detail/utility.hpp"            // plssvm::detail::{unreachable, get}
 #include "plssvm/kernel_function_types.hpp"     // plssvm::csvm_to_backend_type_v, plssvm::backend_type
 #include "plssvm/matrix.hpp"                    // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"          // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                 // plssvm::parameter
 #include "plssvm/shape.hpp"                     // plssvm::shape
 #include "plssvm/solver_types.hpp"              // plssvm::solver_type
@@ -292,7 +293,7 @@ TYPED_TEST_P(GenericCSVM, blas_level_3_explicit_without_C) {
           { plssvm::real_type{ 0.3 }, plssvm::real_type{ 1.3 }, plssvm::real_type{ 2.3 } } }
     };
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A{ util::init_explicit_matrices<csvm_type, device_ptr_type>(matr_A, svm) };
 
     const plssvm::soa_matrix<plssvm::real_type> B{ { { plssvm::real_type{ 1.0 }, plssvm::real_type{ 2.0 }, plssvm::real_type{ 3.0 } },
@@ -338,7 +339,7 @@ TYPED_TEST_P(GenericCSVM, blas_level_3_explicit) {
           { plssvm::real_type{ 0.3 }, plssvm::real_type{ 1.3 }, plssvm::real_type{ 2.3 } } }
     };
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A{ util::init_explicit_matrices<csvm_type, device_ptr_type>(matr_A, svm) };
 
     const plssvm::soa_matrix<plssvm::real_type> B{ { { plssvm::real_type{ 1.0 }, plssvm::real_type{ 2.0 }, plssvm::real_type{ 3.0 } },
@@ -384,7 +385,7 @@ TYPED_TEST_P(GenericCSVM, conjugate_gradients_trivial) {
           { plssvm::real_type{ 0.0 }, plssvm::real_type{ 0.0 }, plssvm::real_type{ 0.0 }, plssvm::real_type{ 1.0 } } }
     };
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A{ util::init_explicit_matrices<csvm_type, device_ptr_type>(matr_A, svm) };
 
     const plssvm::soa_matrix<plssvm::real_type> B{ { { plssvm::real_type{ 1.0 }, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 }, plssvm::real_type{ -1.0 } },
@@ -416,7 +417,7 @@ TYPED_TEST_P(GenericCSVM, conjugate_gradients) {
           { plssvm::real_type{ 1.0 }, plssvm::real_type{ 3.0 } } }
     };
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A = util::init_explicit_matrices<csvm_type, device_ptr_type>(matr_A, svm);
 
     const plssvm::soa_matrix<plssvm::real_type> B{ { { plssvm::real_type{ 1.0 }, plssvm::real_type{ 2.0 } },
@@ -483,7 +484,7 @@ TYPED_TEST_P(GenericCSVMKernelFunction, blas_level_3_assembly_implicit_without_C
     const auto [q, QA_cost] = ground_truth::perform_dimensional_reduction(params, matr_A);
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows() - 1, svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows() - 1, svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A{ util::init_implicit_matrices<csvm_type, device_ptr_type>(matr_A, svm, params, q, QA_cost) };
 
     const plssvm::soa_matrix<plssvm::real_type> B{ { { plssvm::real_type{ 1.0 }, plssvm::real_type{ 2.0 }, plssvm::real_type{ 3.0 } },
@@ -540,7 +541,7 @@ TYPED_TEST_P(GenericCSVMKernelFunction, blas_level_3_assembly_implicit) {
     const auto [q, QA_cost] = ground_truth::perform_dimensional_reduction(params, matr_A);
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows() - 1, svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows() - 1, svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A{ util::init_implicit_matrices<csvm_type, device_ptr_type>(matr_A, svm, params, q, QA_cost) };
 
     const plssvm::soa_matrix<plssvm::real_type> B{ { { plssvm::real_type{ 1.0 }, plssvm::real_type{ 2.0 }, plssvm::real_type{ 3.0 } },
@@ -602,7 +603,7 @@ TYPED_TEST_P(GenericCSVMKernelFunction, predict_values) {
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(params, csvm_test_type::additional_arguments);
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::rectangular_data_distribution>(data.num_rows(), 1);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::rectangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows(), 1);
 
     // predict the values using the previously learned support vectors and weights
     const plssvm::aos_matrix<plssvm::real_type> calculated = svm.predict_values(params, support_vectors, weights, rho, w, data);
@@ -667,7 +668,7 @@ TYPED_TEST_P(GenericCSVMKernelFunction, predict_values_provided_w) {
         const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(params, csvm_test_type::additional_arguments);
 
         // be sure to use the correct data distribution
-        svm.data_distribution_ = std::make_unique<plssvm::detail::rectangular_data_distribution>(data.num_rows(), 1);
+        svm.data_distribution_ = std::make_unique<plssvm::detail::rectangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows(), 1);
 
         // predict the values using the previously learned support vectors and weights
         const plssvm::aos_matrix<plssvm::real_type> calculated = svm.predict_values(params, support_vectors, weights, rho, w, data);
@@ -849,7 +850,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix_minimal) {
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(params, csvm_test_type::additional_arguments);
     const std::size_t num_devices = svm.num_available_devices();
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows() - 1, num_devices);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows() - 1, num_devices);
 
     // automatic solver type not permitted
     if constexpr (solver == plssvm::solver_type::automatic) {
@@ -959,7 +960,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix) {
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(params, csvm_test_type::additional_arguments);
     const std::size_t num_devices = svm.num_available_devices();
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows() - 1, num_devices);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows() - 1, num_devices);
 
     // automatic solver type not permitted
     if constexpr (solver == plssvm::solver_type::automatic) {
@@ -1075,7 +1076,7 @@ TYPED_TEST_P(GenericCSVMDeathTest, blas_level_3_automatic) {
           { plssvm::real_type{ 0.3 }, plssvm::real_type{ 1.3 }, plssvm::real_type{ 2.3 } } }
     };
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A{ util::init_explicit_matrices<csvm_type, device_ptr_type>(matr_A, svm) };
 
     const plssvm::soa_matrix<plssvm::real_type> B{ { { plssvm::real_type{ 1.0 }, plssvm::real_type{ 2.0 }, plssvm::real_type{ 3.0 } },
@@ -1115,7 +1116,7 @@ TYPED_TEST_P(GenericCSVMSolverDeathTest, conjugate_gradients_empty_B) {
     const plssvm::real_type QA_cost{ 1.0 };
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A{ util::init_matrices<csvm_type, device_ptr_type>(matr_A, solver, svm, params, q_red, QA_cost) };
 
     // create empty matrix
@@ -1141,7 +1142,7 @@ TYPED_TEST_P(GenericCSVMSolverDeathTest, conjugate_gradients_invalid_eps) {
     const plssvm::real_type QA_cost{ 1.0 };
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A{ util::init_matrices<csvm_type, device_ptr_type>(matr_A, solver, svm, params, q_red, QA_cost) };
 
     const plssvm::soa_matrix<plssvm::real_type> B{ plssvm::shape{ 1, 6 } };
@@ -1167,7 +1168,7 @@ TYPED_TEST_P(GenericCSVMSolverDeathTest, conjugate_gradients_invalid_max_cg_iter
     const plssvm::real_type QA_cost{ 1.0 };
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
     const std::vector<plssvm::detail::move_only_any> A{ util::init_matrices<csvm_type, device_ptr_type>(matr_A, solver, svm, params, q_red, QA_cost) };
 
     const plssvm::soa_matrix<plssvm::real_type> B{ plssvm::shape{ 1, 6 } };
@@ -1195,7 +1196,7 @@ TYPED_TEST_P(GenericCSVMSolverDeathTest, run_blas_level_3_wrong_number_of_kernel
         const plssvm::real_type QA_cost{ 1.0 };
 
         // be sure to use the correct data distribution
-        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
         std::vector<plssvm::detail::move_only_any> A{ util::init_matrices<csvm_type, device_ptr_type>(matr_A, solver, svm, params, q_red, QA_cost) };
         A.pop_back();
 
@@ -1227,7 +1228,7 @@ TYPED_TEST_P(GenericCSVMSolverDeathTest, blas_level_3_empty_matrices) {
         const plssvm::real_type QA_cost{ 1.0 };
 
         // be sure to use the correct data distribution
-        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
         const std::vector<plssvm::detail::move_only_any> A{ util::init_matrices<csvm_type, device_ptr_type>(matr_A, solver, svm, params, q_red, QA_cost) };
 
         plssvm::soa_matrix<plssvm::real_type> matr{ plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
@@ -1258,7 +1259,7 @@ TYPED_TEST_P(GenericCSVMSolverDeathTest, blas_level_3_missing_padding) {
         const plssvm::real_type QA_cost{ 1.0 };
 
         // be sure to use the correct data distribution
-        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
         const std::vector<plssvm::detail::move_only_any> A{ util::init_matrices<csvm_type, device_ptr_type>(matr_A, solver, svm, params, q_red, QA_cost) };
 
         plssvm::soa_matrix<plssvm::real_type> matr_padded{ plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
@@ -1289,7 +1290,7 @@ TYPED_TEST_P(GenericCSVMSolverDeathTest, blas_level_3_matrix_shape_mismatch) {
         const plssvm::real_type QA_cost{ 1.0 };
 
         // be sure to use the correct data distribution
-        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
         const std::vector<plssvm::detail::move_only_any> A{ util::init_matrices<csvm_type, device_ptr_type>(matr_A, solver, svm, params, q_red, QA_cost) };
 
         plssvm::soa_matrix<plssvm::real_type> B{ plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
@@ -1320,7 +1321,7 @@ TYPED_TEST_P(GenericCSVMSolverDeathTest, blas_level_3_matrix_padding_mismatch) {
         const plssvm::real_type QA_cost{ 1.0 };
 
         // be sure to use the correct data distribution
-        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(matr_A.num_rows(), svm.num_available_devices());
+        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, matr_A.num_rows(), svm.num_available_devices());
         const std::vector<plssvm::detail::move_only_any> A{ util::init_matrices<csvm_type, device_ptr_type>(matr_A, solver, svm, params, q_red, QA_cost) };
 
         plssvm::soa_matrix<plssvm::real_type> B{ plssvm::shape{ 4, 4 }, plssvm::shape{ 3, 3 } };
@@ -1492,7 +1493,7 @@ TYPED_TEST_P(GenericCSVMKernelFunctionDeathTest, assemble_kernel_matrix_automati
     const plssvm::real_type QA_cost = 42.0;
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(A.num_rows() - 1, svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, A.num_rows() - 1, svm.num_available_devices());
 
     // the solver type must not be automatic
     EXPECT_DEATH(std::ignore = svm.assemble_kernel_matrix(plssvm::solver_type::automatic, params, A, q_red, QA_cost), ::testing::HasSubstr("An explicit solver type must be provided instead of solver_type::automatic!"));
@@ -1524,7 +1525,7 @@ TYPED_TEST_P(GenericCSVMKernelFunctionDeathTest, predict_values_empty_matrices)
     const auto data = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 2, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows(), svm.num_available_devices());
 
     // support vectors shouldn't be empty
     EXPECT_DEATH(std::ignore = svm.predict_values(params, empty_soa_matr, weights, rho, w, data), "The support vectors must not be empty!");
@@ -1564,7 +1565,7 @@ TYPED_TEST_P(GenericCSVMKernelFunctionDeathTest, predict_values_missing_padding)
     const auto data_without_padding = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 2, 4 });
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows(), svm.num_available_devices());
 
     // support vectors must be padded
     EXPECT_DEATH(std::ignore = svm.predict_values(params, support_vectors_without_padding, weights, rho, w, data), "The support vectors must be padded!");
@@ -1598,7 +1599,7 @@ TYPED_TEST_P(GenericCSVMKernelFunctionDeathTest, predict_values_sv_alpha_size_mi
     const auto data = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 2, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows(), svm.num_available_devices());
 
     // the number of support vectors and weights must be identical
     EXPECT_DEATH(std::ignore = svm.predict_values(params, support_vectors, weights, rho, w, data), ::testing::HasSubstr("The number of support vectors (3) and number of weights (4) must be the same!"));
@@ -1626,7 +1627,7 @@ TYPED_TEST_P(GenericCSVMKernelFunctionDeathTest, predict_values_rho_alpha_size_m
     const auto data = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 2, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows(), svm.num_available_devices());
 
     // the number of rho values and weight vectors must be identical
     EXPECT_DEATH(std::ignore = svm.predict_values(params, support_vectors, weights, rho, w, data), ::testing::HasSubstr("The number of rho values (1) and the number of weight vectors (2) must be the same!"));
@@ -1653,7 +1654,7 @@ TYPED_TEST_P(GenericCSVMKernelFunctionDeathTest, predict_values_w_size_mismatch)
     const auto data = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 2, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows(), svm.num_available_devices());
 
     // the number of features and w values must be identical
     auto w = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 2, 3 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
@@ -1685,7 +1686,7 @@ TYPED_TEST_P(GenericCSVMKernelFunctionDeathTest, predict_values_num_features_mis
     const auto data = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 2, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
 
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows(), svm.num_available_devices());
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_rows(), svm.num_available_devices());
 
     // the number of features for the support vectors and predict points must be identical
     EXPECT_DEATH(std::ignore = svm.predict_values(params, support_vectors, weights, rho, w, data), ::testing::HasSubstr("The number of features in the support vectors (5) must be the same as in the data points to predict (4)!"));
@@ -1735,7 +1736,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunctionDeathTest, assemble_kernel_matrix_em
         const plssvm::real_type QA_cost = 42.0;
 
         // be sure to use the correct data distribution
-        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(A.num_rows() - 1, svm.num_available_devices());
+        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, A.num_rows() - 1, svm.num_available_devices());
 
         const plssvm::soa_matrix<plssvm::real_type> empty_matr{};
         const std::vector<plssvm::real_type> empty_vec{};
@@ -1771,7 +1772,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunctionDeathTest, assemble_kernel_matrix_A_
         const plssvm::real_type QA_cost = 42.0;
 
         // be sure to use the correct data distribution
-        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(A.num_rows() - 1, svm.num_available_devices());
+        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, A.num_rows() - 1, svm.num_available_devices());
 
         // the A matrix must be padded
         EXPECT_DEATH(std::ignore = svm.assemble_kernel_matrix(solver, params, A, q_red, QA_cost), ::testing::HasSubstr("The matrix to setup on the devices must be padded!"));
@@ -1802,7 +1803,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunctionDeathTest, assemble_kernel_matrix_si
         const plssvm::real_type QA_cost = 42.0;
 
         // be sure to use the correct data distribution
-        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(A.num_rows() - 1, svm.num_available_devices());
+        svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, A.num_rows() - 1, svm.num_available_devices());
 
         // the A matrix must be padded
         EXPECT_DEATH(std::ignore = svm.assemble_kernel_matrix(solver, params, A, q_red, QA_cost), ::testing::HasSubstr("The q_red size (4) mismatches the number of data points after dimensional reduction (3)!"));
diff --git a/tests/backends/generic_csvm_tests.hpp b/tests/backends/generic_csvm_tests.hpp
index 4fb412db2..84b9b7ad9 100644
--- a/tests/backends/generic_csvm_tests.hpp
+++ b/tests/backends/generic_csvm_tests.hpp
@@ -15,9 +15,10 @@
 
 #include "plssvm/constants.hpp"                         // plssvm::real_type, plssvm::PADDING_SIZE
 #include "plssvm/data_set/classification_data_set.hpp"  // plssvm::classification_data_set
-#include "plssvm/detail/data_distribution.hpp"          // plssvm::detail::triangular_data_distribution
+#include "plssvm/detail/data_distribution.hpp"          // plssvm::detail::{triangular_data_distribution, rectangular_data_distribution}
 #include "plssvm/kernel_function_types.hpp"             // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                            // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                         // plssvm::parameter
 #include "plssvm/shape.hpp"                             // plssvm::shape
 
@@ -52,9 +53,11 @@ TYPED_TEST_P(GenericBackendCSVM, blas_level_3_kernel_explicit) {
     const plssvm::classification_data_set data{ PLSSVM_CLASSIFICATION_TEST_FILE };
     const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data.data());
 
+    // emulate two devices to ensure device_kernel_symm_mirror is called
+    const std::size_t num_devices = 2;
+
     // create correct data distribution for the ground truth calculation
-    const plssvm::detail::triangular_data_distribution dist{ data.num_data_points() - 1, 1 };
-    const std::vector<plssvm::real_type> kernel_matrix = ground_truth::assemble_device_specific_kernel_matrix(params, data.data(), q_red, QA_cost, dist, 0);
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, data.num_data_points() - 1, num_devices };
 
     const auto B = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
 
@@ -64,14 +67,35 @@ TYPED_TEST_P(GenericBackendCSVM, blas_level_3_kernel_explicit) {
 
     const std::size_t num_rhs = B.shape().x;
     const std::size_t num_rows = B.shape().y;
-    device_kernel_symm(num_rows, num_rhs, alpha, kernel_matrix, B, beta, C);
+
+    plssvm::soa_matrix<plssvm::real_type> C_res{ C.shape(), plssvm::real_type{ 0.0 }, C.padding() };
+
+    for (std::size_t device = 0; device < num_devices; ++device) {
+        // create kernel matrix
+        const std::vector<plssvm::real_type> kernel_matrix = ground_truth::assemble_device_specific_kernel_matrix(params, data.data(), q_red, QA_cost, dist, device);
+
+        plssvm::soa_matrix<plssvm::real_type> C_temp{ C.shape(), plssvm::real_type{ 0.0 }, C.padding() };
+        if (device == 0) {
+            C_temp = C;
+        }
+
+        const std::size_t specific_num_rows = dist.place_specific_num_rows(device);
+        const std::size_t row_offset = dist.place_row_offset(device);
+        device_kernel_symm(num_rows, num_rhs, specific_num_rows, row_offset, alpha, kernel_matrix, B, beta, C_temp);
+        const std::size_t num_mirror_rows = num_rows - row_offset - specific_num_rows;
+        if (num_mirror_rows > 0) {
+            device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, specific_num_rows, row_offset, alpha, kernel_matrix, B, beta, C_temp);
+        }
+
+        C_res += C_temp;
+    }
 
     // calculate correct results
     const plssvm::aos_matrix<plssvm::real_type> kernel_matrix_gemm_padded = ground_truth::assemble_full_kernel_matrix(params, data.data(), q_red, QA_cost);
     ground_truth::gemm(alpha, kernel_matrix_gemm_padded, B, beta, ground_truth_C);
 
     // check C for correctness
-    EXPECT_FLOATING_POINT_MATRIX_NEAR(C, ground_truth_C);
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(C_res, ground_truth_C);
 }
 
 TYPED_TEST_P(GenericBackendCSVM, calculate_w) {
@@ -83,7 +107,11 @@ TYPED_TEST_P(GenericBackendCSVM, calculate_w) {
 
     // calculate w
     plssvm::soa_matrix<plssvm::real_type> w{ plssvm::shape{ weights.num_rows(), data.data().num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
-    device_kernel_w_linear(w, weights, data.data());
+
+    // create correct data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, data.num_data_points(), 1 };
+
+    device_kernel_w_linear(w, weights, data.data(), dist.place_specific_num_rows(0), dist.place_row_offset(0));
 
     // calculate correct results
     const plssvm::soa_matrix<plssvm::real_type> correct_w = ground_truth::calculate_w(weights, data.data());
@@ -120,31 +148,34 @@ TYPED_TEST_P(GenericBackendCSVMKernelFunction, assemble_kernel_matrix_explicit)
     }
 
     // create correct data distribution for the ground truth calculation
-    const plssvm::detail::triangular_data_distribution dist{ data.num_data_points() - 1, 1 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, data.num_data_points() - 1, 1 };
 
     const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data_matr);
     const plssvm::real_type cost = plssvm::real_type{ 1.0 } / params.cost;
 
     std::vector<plssvm::real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
 
+    const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
+    const std::size_t row_offset = dist.place_row_offset(0);
+
     switch (kernel) {
         case plssvm::kernel_function_type::linear:
-            device_kernel_assembly<plssvm::kernel_function_type::linear>(q_red, kernel_matrix, data_matr, QA_cost, cost);
+            device_kernel_assembly<plssvm::kernel_function_type::linear>(kernel_matrix, data_matr, device_specific_num_rows, row_offset, q_red, QA_cost, cost);
             break;
         case plssvm::kernel_function_type::polynomial:
-            device_kernel_assembly<plssvm::kernel_function_type::polynomial>(q_red, kernel_matrix, data_matr, QA_cost, cost, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            device_kernel_assembly<plssvm::kernel_function_type::polynomial>(kernel_matrix, data_matr, device_specific_num_rows, row_offset, q_red, QA_cost, cost, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
             break;
         case plssvm::kernel_function_type::rbf:
-            device_kernel_assembly<plssvm::kernel_function_type::rbf>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
+            device_kernel_assembly<plssvm::kernel_function_type::rbf>(kernel_matrix, data_matr, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
             break;
         case plssvm::kernel_function_type::sigmoid:
-            device_kernel_assembly<plssvm::kernel_function_type::sigmoid>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            device_kernel_assembly<plssvm::kernel_function_type::sigmoid>(kernel_matrix, data_matr, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<plssvm::real_type>(params.gamma), params.coef0);
             break;
         case plssvm::kernel_function_type::laplacian:
-            device_kernel_assembly<plssvm::kernel_function_type::laplacian>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
+            device_kernel_assembly<plssvm::kernel_function_type::laplacian>(kernel_matrix, data_matr, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
             break;
         case plssvm::kernel_function_type::chi_squared:
-            device_kernel_assembly<plssvm::kernel_function_type::chi_squared>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
+            device_kernel_assembly<plssvm::kernel_function_type::chi_squared>(kernel_matrix, data_matr, device_specific_num_rows, row_offset, q_red, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
             break;
     }
     const std::vector<plssvm::real_type> correct_kernel_matrix = ground_truth::assemble_device_specific_kernel_matrix(params, data_matr, q_red, QA_cost, dist, 0);
@@ -155,6 +186,7 @@ TYPED_TEST_P(GenericBackendCSVMKernelFunction, assemble_kernel_matrix_explicit)
 }
 
 TYPED_TEST_P(GenericBackendCSVMKernelFunction, blas_level_3_kernel_implicit) {
+    using namespace plssvm::operators;
     constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
 
     const plssvm::real_type alpha{ 1.0 };
@@ -180,24 +212,33 @@ TYPED_TEST_P(GenericBackendCSVMKernelFunction, blas_level_3_kernel_implicit) {
     auto C = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
     auto ground_truth_C{ C };
 
+    // scale C
+    C *= beta;
+
+    // create correct data distribution
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, data.num_data_points() - 1, 1 };
+
+    const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
+    const std::size_t row_offset = dist.place_row_offset(0);
+
     switch (kernel) {
         case plssvm::kernel_function_type::linear:
-            device_kernel_assembly_symm<plssvm::kernel_function_type::linear>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C);
+            device_kernel_assembly_symm<plssvm::kernel_function_type::linear>(alpha, q_red, data_matr, device_specific_num_rows, row_offset, QA_cost, cost, B, C);
             break;
         case plssvm::kernel_function_type::polynomial:
-            device_kernel_assembly_symm<plssvm::kernel_function_type::polynomial>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            device_kernel_assembly_symm<plssvm::kernel_function_type::polynomial>(alpha, q_red, data_matr, device_specific_num_rows, row_offset, QA_cost, cost, B, C, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
             break;
         case plssvm::kernel_function_type::rbf:
-            device_kernel_assembly_symm<plssvm::kernel_function_type::rbf>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma));
+            device_kernel_assembly_symm<plssvm::kernel_function_type::rbf>(alpha, q_red, data_matr, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<plssvm::real_type>(params.gamma));
             break;
         case plssvm::kernel_function_type::sigmoid:
-            device_kernel_assembly_symm<plssvm::kernel_function_type::sigmoid>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            device_kernel_assembly_symm<plssvm::kernel_function_type::sigmoid>(alpha, q_red, data_matr, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<plssvm::real_type>(params.gamma), params.coef0);
             break;
         case plssvm::kernel_function_type::laplacian:
-            device_kernel_assembly_symm<plssvm::kernel_function_type::laplacian>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma));
+            device_kernel_assembly_symm<plssvm::kernel_function_type::laplacian>(alpha, q_red, data_matr, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<plssvm::real_type>(params.gamma));
             break;
         case plssvm::kernel_function_type::chi_squared:
-            device_kernel_assembly_symm<plssvm::kernel_function_type::chi_squared>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma));
+            device_kernel_assembly_symm<plssvm::kernel_function_type::chi_squared>(alpha, q_red, data_matr, device_specific_num_rows, row_offset, QA_cost, cost, B, C, std::get<plssvm::real_type>(params.gamma));
             break;
     }
 
@@ -230,24 +271,30 @@ TYPED_TEST_P(GenericBackendCSVMKernelFunction, predict_values) {
 
     plssvm::aos_matrix<plssvm::real_type> out{ plssvm::shape{ predict_points.num_rows(), weights.num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
 
+    // create correct data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, predict_points.num_rows(), 1 };
+
+    const std::size_t device_specific_num_predict_points = dist.place_specific_num_rows(0);
+    const std::size_t row_offset = dist.place_row_offset(0);
+
     switch (kernel) {
         case plssvm::kernel_function_type::linear:
-            device_kernel_predict_linear(out, correct_w, rho, predict_points);
+            device_kernel_predict_linear(out, correct_w, rho, predict_points, device_specific_num_predict_points, row_offset);
             break;
         case plssvm::kernel_function_type::polynomial:
-            device_kernel_predict<plssvm::kernel_function_type::polynomial>(out, weights, rho, data_matr, predict_points, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            device_kernel_predict<plssvm::kernel_function_type::polynomial>(out, weights, rho, data_matr, predict_points, device_specific_num_predict_points, row_offset, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
             break;
         case plssvm::kernel_function_type::rbf:
-            device_kernel_predict<plssvm::kernel_function_type::rbf>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma));
+            device_kernel_predict<plssvm::kernel_function_type::rbf>(out, weights, rho, data_matr, predict_points, device_specific_num_predict_points, row_offset, std::get<plssvm::real_type>(params.gamma));
             break;
         case plssvm::kernel_function_type::sigmoid:
-            device_kernel_predict<plssvm::kernel_function_type::sigmoid>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            device_kernel_predict<plssvm::kernel_function_type::sigmoid>(out, weights, rho, data_matr, predict_points, device_specific_num_predict_points, row_offset, std::get<plssvm::real_type>(params.gamma), params.coef0);
             break;
         case plssvm::kernel_function_type::laplacian:
-            device_kernel_predict<plssvm::kernel_function_type::laplacian>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma));
+            device_kernel_predict<plssvm::kernel_function_type::laplacian>(out, weights, rho, data_matr, predict_points, device_specific_num_predict_points, row_offset, std::get<plssvm::real_type>(params.gamma));
             break;
         case plssvm::kernel_function_type::chi_squared:
-            device_kernel_predict<plssvm::kernel_function_type::chi_squared>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma));
+            device_kernel_predict<plssvm::kernel_function_type::chi_squared>(out, weights, rho, data_matr, predict_points, device_specific_num_predict_points, row_offset, std::get<plssvm::real_type>(params.gamma));
             break;
     }
 
@@ -284,16 +331,52 @@ TYPED_TEST_P(GenericBackendCSVMDeathTest, blas_level_3_kernel_explicit) {
     const std::size_t num_rhs = B.shape().x;
     const std::size_t num_rows = B.shape().y;
 
-    // the A matrix must have the correct size
-    EXPECT_DEATH(device_kernel_symm(num_rows, num_rows, alpha, std::vector<plssvm::real_type>{}, B, beta, C), fmt::format("A matrix sizes mismatch!: 0 != {}", kernel_matrix.size()));
+    // create correct data distribution
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, num_rows, 1 };
+    const std::size_t specific_num_rows = dist.place_specific_num_rows(0);
+    const std::size_t row_offset = dist.place_row_offset(0);
+
+    {
+        // the A matrix must have the correct size
+        EXPECT_DEATH(device_kernel_symm(num_rows, num_rhs, specific_num_rows, row_offset, alpha, std::vector<plssvm::real_type>{}, B, beta, C), "A matrix may not be empty!");
+
+        // the B matrix must have the correct shape
+        const auto B_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(device_kernel_symm(num_rows, num_rhs, specific_num_rows, row_offset, alpha, kernel_matrix, B_wrong, beta, C), ::testing::HasSubstr(fmt::format("B matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+
+        // the C matrix must have the correct shape
+        auto C_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(device_kernel_symm(num_rows, num_rhs, specific_num_rows, row_offset, alpha, kernel_matrix, B, beta, C_wrong), ::testing::HasSubstr(fmt::format("C matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+
+        // the place specific number of rows may not be too large
+        EXPECT_DEATH(device_kernel_symm(num_rows, num_rhs, num_rows + 1, row_offset, alpha, kernel_matrix, B, beta, C), ::testing::HasSubstr(fmt::format("The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", num_rows + 1, num_rows)));
+
+        // the row offset may not be too large
+        EXPECT_DEATH(device_kernel_symm(num_rows, num_rhs, specific_num_rows, num_rows + 1, alpha, kernel_matrix, B, beta, C), ::testing::HasSubstr(fmt::format("The row offset ({}) cannot be greater the the total number of rows ({})!", num_rows + 1, num_rows)));
+    }
+    {
+        const std::size_t num_mirror_rows = num_rows - row_offset - specific_num_rows;
 
-    // the B matrix must have the correct shape
-    const auto B_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
-    EXPECT_DEATH(device_kernel_symm(num_rows, num_rows, alpha, kernel_matrix, B_wrong, beta, C), ::testing::HasSubstr(fmt::format("B matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+        // the A matrix must have the correct size
+        EXPECT_DEATH(device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, specific_num_rows, row_offset, alpha, std::vector<plssvm::real_type>{}, B, beta, C), "A matrix may not be empty!");
 
-    // the C matrix must have the correct shape
-    auto C_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
-    EXPECT_DEATH(device_kernel_symm(num_rows, num_rows, alpha, kernel_matrix, B, beta, C_wrong), ::testing::HasSubstr(fmt::format("C matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+        // the B matrix must have the correct shape
+        const auto B_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, specific_num_rows, row_offset, alpha, kernel_matrix, B_wrong, beta, C), ::testing::HasSubstr(fmt::format("B matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+
+        // the C matrix must have the correct shape
+        auto C_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, specific_num_rows, row_offset, alpha, kernel_matrix, B, beta, C_wrong), ::testing::HasSubstr(fmt::format("C matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+
+        // the place specific number of rows may not be too large
+        EXPECT_DEATH(device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, num_rows + 1, row_offset, alpha, kernel_matrix, B, beta, C), ::testing::HasSubstr(fmt::format("The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", num_rows + 1, num_rows)));
+
+        // the mirror number of rows may not be too large
+        EXPECT_DEATH(device_kernel_symm_mirror(num_rows, num_rhs, num_rows + 1, specific_num_rows, row_offset, alpha, kernel_matrix, B, beta, C), ::testing::HasSubstr(fmt::format("The number of mirror rows ({}) cannot be greater the the total number of rows ({})!", num_rows + 1, num_rows)));
+
+        // the row offset may not be too large
+        EXPECT_DEATH(device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, specific_num_rows, num_rows + 1, alpha, kernel_matrix, B, beta, C), ::testing::HasSubstr(fmt::format("The row offset ({}) cannot be greater the the total number of rows ({})!", num_rows + 1, num_rows)));
+    }
 }
 
 TYPED_TEST_P(GenericBackendCSVMDeathTest, calculate_w) {
@@ -304,12 +387,24 @@ TYPED_TEST_P(GenericBackendCSVMDeathTest, calculate_w) {
     const auto weights = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.num_data_points() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
     plssvm::soa_matrix<plssvm::real_type> w(plssvm::shape{ weights.num_rows(), data.data().num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
 
+    // create correct data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, data.num_data_points(), 1 };
+    const std::size_t specific_num_rows = dist.place_specific_num_rows(0);
+    const std::size_t row_offset = dist.place_row_offset(0);
+
     // the weights and support vector matrix shapes must match
     const auto weights_wrong = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.num_data_points() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
-    EXPECT_DEATH(device_kernel_w_linear(w, weights_wrong, data.data()), fmt::format("Size mismatch: {} vs {}!", weights_wrong.num_cols(), data.data().num_rows()));
+    EXPECT_DEATH(device_kernel_w_linear(w, weights_wrong, data.data(), specific_num_rows, row_offset), fmt::format("Size mismatch: {} vs {}!", weights_wrong.num_cols(), data.data().num_rows()));
+
     // the w shape must be correct
     plssvm::soa_matrix<plssvm::real_type> w_wrong{};
-    EXPECT_DEATH(device_kernel_w_linear(w_wrong, weights, data.data()), ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs [{}, {}]!", weights.num_rows(), data.data().num_cols())));
+    EXPECT_DEATH(device_kernel_w_linear(w_wrong, weights, data.data(), specific_num_rows, row_offset), ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs [{}, {}]!", weights.num_rows(), data.data().num_cols())));
+
+    // the place specific number of rows may not be too large
+    EXPECT_DEATH(device_kernel_w_linear(w, weights, data.data(), data.num_data_points() + 1, row_offset), ::testing::HasSubstr(fmt::format("The number of place specific sv ({}) cannot be greater the the total number of sv ({})!", data.num_data_points() + 1, data.num_data_points())));
+
+    // the row offset may not be too large
+    EXPECT_DEATH(device_kernel_w_linear(w, weights, data.data(), specific_num_rows, data.num_data_points() + 1), ::testing::HasSubstr(fmt::format("The sv offset ({}) cannot be greater the the total number of sv ({})!", data.num_data_points() + 1, data.num_data_points())));
 }
 
 REGISTER_TYPED_TEST_SUITE_P(GenericBackendCSVMDeathTest,
@@ -336,47 +431,56 @@ TYPED_TEST_P(GenericBackendCSVMKernelFunctionDeathTest, assemble_kernel_matrix_e
     const plssvm::classification_data_set data{ PLSSVM_CLASSIFICATION_TEST_FILE };
 
     // create correct data distribution for the ground truth calculation
-    const plssvm::detail::triangular_data_distribution dist{ data.num_data_points() - 1, 1 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, data.num_data_points() - 1, 1 };
 
     const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data.data());
 
     // create correct data distribution for the ground truth calculation
     std::vector<plssvm::real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
 
+    const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
+    const std::size_t row_offset = dist.place_row_offset(0);
+
     // helper lambda to reduce the amount of needed switches!
-    const auto run_assembly = [=](const plssvm::parameter &params_p, const std::vector<plssvm::real_type> &q_red_p, std::vector<plssvm::real_type> &kernel_matrix_p, const plssvm::soa_matrix<plssvm::real_type> &data_p, const plssvm::real_type QA_cost_p) {
+    const auto run_assembly = [=](const plssvm::parameter &params_p, std::vector<plssvm::real_type> &kernel_matrix_p, const plssvm::soa_matrix<plssvm::real_type> &data_p, const std::size_t device_specific_num_rows_p, const std::size_t row_offset_p, const std::vector<plssvm::real_type> &q_red_p, const plssvm::real_type QA_cost_p) {
         switch (kernel) {
             case plssvm::kernel_function_type::linear:
-                device_kernel_assembly<plssvm::kernel_function_type::linear>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost);
+                device_kernel_assembly<plssvm::kernel_function_type::linear>(kernel_matrix_p, data_p, device_specific_num_rows_p, row_offset_p, q_red_p, QA_cost_p, params_p.cost);
                 break;
             case plssvm::kernel_function_type::polynomial:
-                device_kernel_assembly<plssvm::kernel_function_type::polynomial>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                device_kernel_assembly<plssvm::kernel_function_type::polynomial>(kernel_matrix_p, data_p, device_specific_num_rows_p, row_offset_p, q_red_p, QA_cost_p, params_p.cost, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
                 break;
             case plssvm::kernel_function_type::rbf:
-                device_kernel_assembly<plssvm::kernel_function_type::rbf>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
+                device_kernel_assembly<plssvm::kernel_function_type::rbf>(kernel_matrix_p, data_p, device_specific_num_rows_p, row_offset_p, q_red_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
                 break;
             case plssvm::kernel_function_type::sigmoid:
-                device_kernel_assembly<plssvm::kernel_function_type::sigmoid>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                device_kernel_assembly<plssvm::kernel_function_type::sigmoid>(kernel_matrix_p, data_p, device_specific_num_rows_p, row_offset_p, q_red_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
                 break;
             case plssvm::kernel_function_type::laplacian:
-                device_kernel_assembly<plssvm::kernel_function_type::laplacian>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
+                device_kernel_assembly<plssvm::kernel_function_type::laplacian>(kernel_matrix_p, data_p, device_specific_num_rows_p, row_offset_p, q_red_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
                 break;
             case plssvm::kernel_function_type::chi_squared:
-                device_kernel_assembly<plssvm::kernel_function_type::chi_squared>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
+                device_kernel_assembly<plssvm::kernel_function_type::chi_squared>(kernel_matrix_p, data_p, device_specific_num_rows_p, row_offset_p, q_red_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
                 break;
         }
     };
 
     // check q_red size (must be equal to the number of data points - 1
-    EXPECT_DEATH(run_assembly(params, std::vector<plssvm::real_type>{}, kernel_matrix, data.data(), QA_cost), fmt::format("Sizes mismatch!: 0 != {}", data.num_data_points() - 1));
+    EXPECT_DEATH(run_assembly(params, kernel_matrix, data.data(), device_specific_num_rows, row_offset, std::vector<plssvm::real_type>{}, QA_cost), fmt::format("Sizes mismatch!: 0 != {}", data.num_data_points() - 1));
 
     // check the kernel matrix size (depending on the usage of GEMM/SYMM)
     std::vector<plssvm::real_type> ret;
-    EXPECT_DEATH(run_assembly(params, q_red, ret, data.data(), QA_cost), ::testing::HasSubstr(fmt::format("Sizes mismatch (SYMM)!: 0 != {}", kernel_matrix.size())));
+    EXPECT_DEATH(run_assembly(params, ret, data.data(), device_specific_num_rows, row_offset, q_red, QA_cost), "A matrix may not be empty!");
+
+    // check place specific number of rows
+    EXPECT_DEATH(run_assembly(params, kernel_matrix, data.data(), q_red.size() + 1, row_offset, q_red, QA_cost), ::testing::HasSubstr(fmt::format("The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", q_red.size() + 1, q_red.size())));
+
+    // check the row offset
+    EXPECT_DEATH(run_assembly(params, kernel_matrix, data.data(), device_specific_num_rows, q_red.size() + 1, q_red, QA_cost), ::testing::HasSubstr(fmt::format("The row offset ({}) cannot be greater the the total number of rows ({})!", q_red.size() + 1, q_red.size())));
 
     // cost must not be 0.0 since 1.0 / cost is used
     params.cost = plssvm::real_type{ 0.0 };
-    EXPECT_DEATH(run_assembly(params, q_red, kernel_matrix, data.data(), QA_cost), "cost must not be 0.0 since it is 1 / plssvm::cost!");
+    EXPECT_DEATH(run_assembly(params, kernel_matrix, data.data(), device_specific_num_rows, row_offset, q_red, QA_cost), "cost must not be 0.0 since it is 1 / plssvm::cost!");
 }
 
 TYPED_TEST_P(GenericBackendCSVMKernelFunctionDeathTest, blas_level_3_kernel_implicit) {
@@ -397,46 +501,61 @@ TYPED_TEST_P(GenericBackendCSVMKernelFunctionDeathTest, blas_level_3_kernel_impl
     const plssvm::real_type beta{ 1.0 };
     plssvm::soa_matrix<plssvm::real_type> C{ B };
 
+    // scale C
+    C *= beta;
+
+    // create correct data distribution
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, data.num_data_points() - 1, 1 };
+
+    const std::size_t device_specific_num_rows = dist.place_specific_num_rows(0);
+    const std::size_t row_offset = dist.place_row_offset(0);
+
     // helper lambda to reduce the amount of needed switches!
-    const auto run_assembly_symm = [=](const plssvm::parameter &params_p, const std::vector<plssvm::real_type> &q_red_p, const plssvm::soa_matrix<plssvm::real_type> &data_p, const plssvm::soa_matrix<plssvm::real_type> &B_p, plssvm::soa_matrix<plssvm::real_type> &C_p) {
+    const auto run_assembly_symm = [=](const plssvm::parameter &params_p, const std::vector<plssvm::real_type> &q_red_p, const plssvm::soa_matrix<plssvm::real_type> &data_p, const std::size_t device_specific_num_rows_p, const std::size_t row_offset_p, const plssvm::soa_matrix<plssvm::real_type> &B_p, plssvm::soa_matrix<plssvm::real_type> &C_p) {
         switch (kernel) {
             case plssvm::kernel_function_type::linear:
-                device_kernel_assembly_symm<plssvm::kernel_function_type::linear>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p);
+                device_kernel_assembly_symm<plssvm::kernel_function_type::linear>(alpha, q_red_p, data_p, device_specific_num_rows_p, row_offset_p, QA_cost, params_p.cost, B_p, C_p);
                 break;
             case plssvm::kernel_function_type::polynomial:
-                device_kernel_assembly_symm<plssvm::kernel_function_type::polynomial>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                device_kernel_assembly_symm<plssvm::kernel_function_type::polynomial>(alpha, q_red_p, data_p, device_specific_num_rows_p, row_offset_p, QA_cost, params_p.cost, B_p, C_p, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
                 break;
             case plssvm::kernel_function_type::rbf:
-                device_kernel_assembly_symm<plssvm::kernel_function_type::rbf>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma));
+                device_kernel_assembly_symm<plssvm::kernel_function_type::rbf>(alpha, q_red_p, data_p, device_specific_num_rows_p, row_offset_p, QA_cost, params_p.cost, B_p, C_p, std::get<plssvm::real_type>(params_p.gamma));
                 break;
             case plssvm::kernel_function_type::sigmoid:
-                device_kernel_assembly_symm<plssvm::kernel_function_type::sigmoid>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                device_kernel_assembly_symm<plssvm::kernel_function_type::sigmoid>(alpha, q_red_p, data_p, device_specific_num_rows_p, row_offset_p, QA_cost, params_p.cost, B_p, C_p, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
                 break;
             case plssvm::kernel_function_type::laplacian:
-                device_kernel_assembly_symm<plssvm::kernel_function_type::laplacian>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma));
+                device_kernel_assembly_symm<plssvm::kernel_function_type::laplacian>(alpha, q_red_p, data_p, device_specific_num_rows_p, row_offset_p, QA_cost, params_p.cost, B_p, C_p, std::get<plssvm::real_type>(params_p.gamma));
                 break;
             case plssvm::kernel_function_type::chi_squared:
-                device_kernel_assembly_symm<plssvm::kernel_function_type::chi_squared>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma));
+                device_kernel_assembly_symm<plssvm::kernel_function_type::chi_squared>(alpha, q_red_p, data_p, device_specific_num_rows_p, row_offset_p, QA_cost, params_p.cost, B_p, C_p, std::get<plssvm::real_type>(params_p.gamma));
                 break;
         }
     };
 
     // check q_red size (must be equal to the number of data points - 1
-    EXPECT_DEATH(run_assembly_symm(params, std::vector<plssvm::real_type>{}, data.data(), B, C), fmt::format("Sizes mismatch!: 0 != {}", data.num_data_points() - 1));
+    EXPECT_DEATH(run_assembly_symm(params, std::vector<plssvm::real_type>{}, data.data(), device_specific_num_rows, row_offset, B, C), fmt::format("Sizes mismatch!: 0 != {}", data.num_data_points() - 1));
+
+    // check place specific number of rows
+    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), q_red.size() + 1, row_offset, B, C), ::testing::HasSubstr(fmt::format("The number of place specific rows ({}) cannot be greater the the total number of rows ({})!", q_red.size() + 1, q_red.size())));
+
+    // check the row offset
+    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), device_specific_num_rows, q_red.size() + 1, B, C), ::testing::HasSubstr(fmt::format("The row offset ({}) cannot be greater the the total number of rows ({})!", q_red.size() + 1, q_red.size())));
 
     // cost must not be 0.0 since 1.0 / cost is used
     plssvm::parameter params2{ params };
     params2.cost = plssvm::real_type{ 0.0 };
-    EXPECT_DEATH(run_assembly_symm(params2, q_red, data.data(), B, C), "cost must not be 0.0 since it is 1 / plssvm::cost!");
+    EXPECT_DEATH(run_assembly_symm(params2, q_red, data.data(), device_specific_num_rows, row_offset, B, C), "cost must not be 0.0 since it is 1 / plssvm::cost!");
 
     // B and C must be of the same shape
     B = plssvm::soa_matrix<plssvm::real_type>{ plssvm::shape{ 1, 1 } };
-    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), B, C), "The matrices B and C must have the same shape!");
+    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), device_specific_num_rows, row_offset, B, C), "The matrices B and C must have the same shape!");
 
     // the number of columns in B must match the number of rows in the data set - 1
     B = plssvm::soa_matrix<plssvm::real_type>{ plssvm::shape{ data.num_classes(), data.num_data_points() - 2 } };
     C = B;
-    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), B, C), ::testing::HasSubstr(fmt::format("The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), data.num_data_points() - 1)));
+    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), device_specific_num_rows, row_offset, B, C), ::testing::HasSubstr(fmt::format("The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), data.num_data_points() - 1)));
 }
 
 TYPED_TEST_P(GenericBackendCSVMKernelFunctionDeathTest, predict_values) {
@@ -455,43 +574,56 @@ TYPED_TEST_P(GenericBackendCSVMKernelFunctionDeathTest, predict_values) {
 
     plssvm::aos_matrix<plssvm::real_type> out{ plssvm::shape{ predict_points.num_rows(), weights.num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
 
+    // create correct data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, predict_points.num_rows(), 1 };
+    const std::size_t device_specific_num_predict_points = dist.place_specific_num_rows(0);
+    const std::size_t row_offset = dist.place_row_offset(0);
+
     if constexpr (kernel == plssvm::kernel_function_type::linear) {
         // the number of classes must match
         std::vector<plssvm::real_type> rho_wrong = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
         rho_wrong.pop_back();
-        EXPECT_DEATH(device_kernel_predict_linear(out, w, rho_wrong, predict_points),
+        EXPECT_DEATH(device_kernel_predict_linear(out, w, rho_wrong, predict_points, device_specific_num_predict_points, row_offset),
                      ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", w.num_rows(), rho_wrong.size())));
 
         // the number of features must match
         const auto predict_points_wrong = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.data().num_rows(), data.data().num_cols() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
-        EXPECT_DEATH(device_kernel_predict_linear(out, w, rho, predict_points_wrong),
+        EXPECT_DEATH(device_kernel_predict_linear(out, w, rho, predict_points_wrong, device_specific_num_predict_points, row_offset),
                      ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", w.num_cols(), predict_points_wrong.num_cols())));
 
         // the output shape must match
         plssvm::aos_matrix<plssvm::real_type> out_wrong{};
-        EXPECT_DEATH(device_kernel_predict_linear(out_wrong, w, rho, predict_points),
+        EXPECT_DEATH(device_kernel_predict_linear(out_wrong, w, rho, predict_points, device_specific_num_predict_points, row_offset),
                      ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs {}!", (plssvm::shape{ predict_points.num_rows(), w.num_rows() }))));
+
+        // the place specific number of rows may not be too large
+        EXPECT_DEATH(device_kernel_predict_linear(out, w, rho, predict_points, predict_points.num_rows() + 1, row_offset),
+                     ::testing::HasSubstr(fmt::format("The number of place specific predict points ({}) cannot be greater the the total number of predict points ({})!", predict_points.num_rows() + 1, predict_points.num_rows())));
+
+        // the row offset may not be too large
+        EXPECT_DEATH(device_kernel_predict_linear(out, w, rho, predict_points, device_specific_num_predict_points, predict_points.num_rows() + 1),
+                     ::testing::HasSubstr(fmt::format("The row offset ({}) cannot be greater the the total number of predict points ({})!", predict_points.num_rows() + 1, predict_points.num_rows())));
     } else {
         // helper lambda to reduce the amount of needed switches!
-        const auto run_predict_values = [=](const plssvm::parameter &params_p, plssvm::aos_matrix<plssvm::real_type> &out_p, const plssvm::aos_matrix<plssvm::real_type> &weights_p, const std::vector<plssvm::real_type> &rho_p, const plssvm::soa_matrix<plssvm::real_type> &support_vectors_p, const plssvm::soa_matrix<plssvm::real_type> &predict_points_p) {
+        const auto run_predict_values = [=](const plssvm::parameter &params_p, plssvm::aos_matrix<plssvm::real_type> &out_p, const plssvm::aos_matrix<plssvm::real_type> &weights_p, const std::vector<plssvm::real_type> &rho_p, const plssvm::soa_matrix<plssvm::real_type> &support_vectors_p, const plssvm::soa_matrix<plssvm::real_type> &predict_points_p, const std::size_t device_specific_num_predict_points_p, const std::size_t row_offset_p) {
             switch (kernel) {
                 case plssvm::kernel_function_type::linear:
                     // unreachable
                     break;
                 case plssvm::kernel_function_type::polynomial:
-                    device_kernel_predict<plssvm::kernel_function_type::polynomial>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                    device_kernel_predict<plssvm::kernel_function_type::polynomial>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, device_specific_num_predict_points_p, row_offset_p, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
                     break;
                 case plssvm::kernel_function_type::rbf:
-                    device_kernel_predict<plssvm::kernel_function_type::rbf>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma));
+                    device_kernel_predict<plssvm::kernel_function_type::rbf>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, device_specific_num_predict_points_p, row_offset_p, std::get<plssvm::real_type>(params_p.gamma));
                     break;
                 case plssvm::kernel_function_type::sigmoid:
-                    device_kernel_predict<plssvm::kernel_function_type::sigmoid>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                    device_kernel_predict<plssvm::kernel_function_type::sigmoid>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, device_specific_num_predict_points_p, row_offset_p, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
                     break;
                 case plssvm::kernel_function_type::laplacian:
-                    device_kernel_predict<plssvm::kernel_function_type::laplacian>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma));
+                    device_kernel_predict<plssvm::kernel_function_type::laplacian>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, device_specific_num_predict_points_p, row_offset_p, std::get<plssvm::real_type>(params_p.gamma));
                     break;
                 case plssvm::kernel_function_type::chi_squared:
-                    device_kernel_predict<plssvm::kernel_function_type::chi_squared>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma));
+                    device_kernel_predict<plssvm::kernel_function_type::chi_squared>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, device_specific_num_predict_points_p, row_offset_p, std::get<plssvm::real_type>(params_p.gamma));
                     break;
             }
         };
@@ -499,23 +631,31 @@ TYPED_TEST_P(GenericBackendCSVMKernelFunctionDeathTest, predict_values) {
         // the number of classes must match
         std::vector<plssvm::real_type> rho_wrong = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
         rho_wrong.pop_back();
-        EXPECT_DEATH(run_predict_values(params, out, weights, rho_wrong, data.data(), predict_points),
+        EXPECT_DEATH(run_predict_values(params, out, weights, rho_wrong, data.data(), predict_points, device_specific_num_predict_points, row_offset),
                      ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", w.num_rows(), rho_wrong.size())));
 
         // the number of support vectors and weights must match
         const auto weights_wrong = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.data().num_rows() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
-        EXPECT_DEATH(run_predict_values(params, out, weights_wrong, rho, data.data(), predict_points),
+        EXPECT_DEATH(run_predict_values(params, out, weights_wrong, rho, data.data(), predict_points, device_specific_num_predict_points, row_offset),
                      ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", weights_wrong.num_cols(), data.data().num_rows())));
 
         // the number of features must match
         const auto predict_points_wrong = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.data().num_rows(), data.data().num_cols() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
-        EXPECT_DEATH(run_predict_values(params, out, weights, rho, data.data(), predict_points_wrong),
+        EXPECT_DEATH(run_predict_values(params, out, weights, rho, data.data(), predict_points_wrong, device_specific_num_predict_points, row_offset),
                      ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", data.data().num_cols(), predict_points_wrong.num_cols())));
 
         // the output shape must match
         plssvm::aos_matrix<plssvm::real_type> out_wrong{};
-        EXPECT_DEATH(run_predict_values(params, out_wrong, weights, rho, data.data(), predict_points),
+        EXPECT_DEATH(run_predict_values(params, out_wrong, weights, rho, data.data(), predict_points, device_specific_num_predict_points, row_offset),
                      ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs {}!", (plssvm::shape{ predict_points.num_rows(), w.num_rows() }))));
+
+        // the place specific number of rows may not be too large
+        EXPECT_DEATH(run_predict_values(params, out, weights, rho, data.data(), predict_points, predict_points.num_rows() + 1, row_offset),
+                     ::testing::HasSubstr(fmt::format("The number of place specific predict points ({}) cannot be greater the the total number of predict points ({})!", predict_points.num_rows() + 1, predict_points.num_rows())));
+
+        // the row offset may not be too large
+        EXPECT_DEATH(run_predict_values(params, out, weights, rho, data.data(), predict_points, device_specific_num_predict_points, predict_points.num_rows() + 1),
+                     ::testing::HasSubstr(fmt::format("The row offset ({}) cannot be greater the the total number of predict points ({})!", predict_points.num_rows() + 1, predict_points.num_rows())));
     }
 }
 
diff --git a/tests/backends/generic_gpu_csvm_tests.hpp b/tests/backends/generic_gpu_csvm_tests.hpp
index c7eb62db1..b68453801 100644
--- a/tests/backends/generic_gpu_csvm_tests.hpp
+++ b/tests/backends/generic_gpu_csvm_tests.hpp
@@ -18,6 +18,7 @@
 #include "plssvm/detail/data_distribution.hpp"          // plssvm::detail::{triangular_data_distribution, rectangular_data_distribution}
 #include "plssvm/kernel_function_types.hpp"             // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                            // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"                         // plssvm::parameter
 #include "plssvm/shape.hpp"                             // plssvm::shape
 
@@ -86,7 +87,7 @@ TYPED_TEST_P(GenericGPUCSVM, run_blas_level_3_kernel_explicit) {
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(csvm_test_type::additional_arguments);
     const std::size_t num_devices = svm.num_available_devices();
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_data_points() - 1, num_devices);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_data_points() - 1, num_devices);
 
     const plssvm::real_type alpha{ 1.0 };
     const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data.data());
@@ -176,7 +177,7 @@ TYPED_TEST_P(GenericGPUCSVM, run_w_kernel) {
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(csvm_test_type::additional_arguments);
     const std::size_t num_devices = svm.num_available_devices();
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::rectangular_data_distribution>(data.num_data_points(), num_devices);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::rectangular_data_distribution>(plssvm::mpi::communicator{}, data.num_data_points(), num_devices);
 
     for (std::size_t device_id = 0; device_id < num_devices; ++device_id) {
         SCOPED_TRACE(fmt::format("device_id {} ({}/{})", device_id, device_id + 1, num_devices));
@@ -370,7 +371,7 @@ TYPED_TEST_P(GenericGPUCSVMKernelFunction, run_assemble_kernel_matrix_explicit)
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(params, csvm_test_type::additional_arguments);
     const std::size_t num_devices = svm.num_available_devices();
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_data_points() - 1, num_devices);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_data_points() - 1, num_devices);
 
     // perform dimensional reduction
     const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data_matr);
@@ -445,7 +446,7 @@ TYPED_TEST_P(GenericGPUCSVMKernelFunction, run_assemble_kernel_matrix_implicit_b
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(params, csvm_test_type::additional_arguments);
     const std::size_t num_devices = svm.num_available_devices();
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_data_points() - 1, num_devices);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(plssvm::mpi::communicator{}, data.num_data_points() - 1, num_devices);
 
     // perform dimensional reduction
     const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data_matr);
@@ -539,7 +540,7 @@ TYPED_TEST_P(GenericGPUCSVMKernelFunction, run_predict_kernel) {
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(params, csvm_test_type::additional_arguments);
     const std::size_t num_devices = svm.num_available_devices();
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::rectangular_data_distribution>(predict_points.num_rows(), num_devices);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::rectangular_data_distribution>(plssvm::mpi::communicator{}, predict_points.num_rows(), num_devices);
 
     for (std::size_t device_id = 0; device_id < num_devices; ++device_id) {
         SCOPED_TRACE(fmt::format("device_id {} ({}/{})", device_id, device_id + 1, num_devices));
diff --git a/tests/backends/stdpar/CMakeLists.txt b/tests/backends/stdpar/CMakeLists.txt
index ad41ef59d..3998d63ae 100644
--- a/tests/backends/stdpar/CMakeLists.txt
+++ b/tests/backends/stdpar/CMakeLists.txt
@@ -9,14 +9,6 @@ if (PLSSVM_ENABLE_ASSERTS AND PLSSVM_STDPAR_BACKEND)
     message(FATAL_ERROR "GTest's death tests not supported with our stdpar backend. Please set \"PLSSVM_ENABLE_ASSERTS=OFF\"!")
 endif ()
 
-# performance tracker not supported with NVHPC
-if (PLSSVM_ENABLE_PERFORMANCE_TRACKING AND PLSSVM_STDPAR_BACKEND AND PLSSVM_STDPAR_BACKEND STREQUAL "NVHPC")
-    message(
-        FATAL_ERROR
-            "The (global) performance tracker is currently not supported with GTest and NVHPC. Please set \"PLSSVM_ENABLE_PERFORMANCE_TRACKING=OFF\" or use another stdpar implementation!"
-    )
-endif ()
-
 # create stdpar tests
 set(PLSSVM_STDPAR_TEST_NAME stdpar_tests)
 
@@ -54,6 +46,26 @@ include(GoogleTest)
 include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
 discover_tests_with_death_test_filter(${PLSSVM_STDPAR_TEST_NAME})
 
+# add minimal run test for the classification task with the stdpar backend
+add_test(NAME MainTrainClassificationStdpar/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 0 -b stdpar "${PLSSVM_CLASSIFICATION_TEST_FILE}"
+                                                                       "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.model"
+)
+add_test(NAME MainPredictClassificationStdpar/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b stdpar "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/classification/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/classification/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_classification.libsvm.predict" # predict file (result)
+)
+
+# add minimal run test for the regression task with the stdpar backend
+add_test(NAME MainTrainRegressionStdpar/executable_minimal COMMAND ${PLSSVM_EXECUTABLE_TRAIN_NAME} -s 1 -b stdpar "${PLSSVM_REGRESSION_TEST_FILE}"
+                                                                   "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.model"
+)
+add_test(NAME MainPredictRegressionStdpar/executable_minimal
+         COMMAND ${PLSSVM_EXECUTABLE_PREDICT_NAME} -b stdpar "${CMAKE_CURRENT_LIST_DIR}/../../data/libsvm/regression/5x4.libsvm" # test file
+                 "${CMAKE_CURRENT_LIST_DIR}/../../data/model/regression/5x4.libsvm.model" # model file
+                 "${CMAKE_CURRENT_BINARY_DIR}/test_regression.libsvm.predict" # predict file (result)
+)
+
 # add test as coverage dependency
 if (TARGET coverage)
     add_dependencies(coverage ${PLSSVM_STDPAR_TEST_NAME})
diff --git a/tests/backends/stdpar/mock_stdpar_csvm.hpp b/tests/backends/stdpar/mock_stdpar_csvm.hpp
index 32f019184..906a8ea2a 100644
--- a/tests/backends/stdpar/mock_stdpar_csvm.hpp
+++ b/tests/backends/stdpar/mock_stdpar_csvm.hpp
@@ -14,6 +14,7 @@
 #pragma once
 
 #include "plssvm/backends/stdpar/csvm.hpp"  // plssvm::stdpar::csvm
+#include "plssvm/mpi/communicator.hpp"      // plssvm::mpi::communicator
 #include "plssvm/svm/csvm.hpp"              // plssvm::csvm
 
 /**
@@ -25,7 +26,7 @@ class mock_stdpar_csvm final : public plssvm::stdpar::csvm {
   public:
     template <typename... Args>
     explicit mock_stdpar_csvm(Args &&...args) :
-        plssvm::csvm{ std::forward<Args>(args)... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, std::forward<Args>(args)... },
         base_type{} { }
 
     // make protected member functions public
diff --git a/tests/backends/stdpar/stdpar_csvm.cpp b/tests/backends/stdpar/stdpar_csvm.cpp
index 10a566bf4..83c881229 100644
--- a/tests/backends/stdpar/stdpar_csvm.cpp
+++ b/tests/backends/stdpar/stdpar_csvm.cpp
@@ -11,7 +11,7 @@
 #include "plssvm/backend_types.hpp"                                                   // plssvm::csvm_to_backend_type_v
 #include "plssvm/backends/stdpar/csvm.hpp"                                            // plssvm::stdpar::{csvm, csvc, csvr}
 #include "plssvm/backends/stdpar/exceptions.hpp"                                      // plssvm::stdpar::backend_exception
-#include "plssvm/backends/stdpar/kernel/cg_explicit/blas.hpp"                         // plssvm::stdpar::device_kernel_symm
+#include "plssvm/backends/stdpar/kernel/cg_explicit/blas.hpp"                         // plssvm::stdpar::{device_kernel_symm, device_ce_kernel_symm_mirror}
 #include "plssvm/backends/stdpar/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::stdpar::device_kernel_assembly
 #include "plssvm/backends/stdpar/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::stdpar::device_kernel_assembly_symm
 #include "plssvm/backends/stdpar/kernel/predict_kernel.hpp"                           // plssvm::stdpar::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
@@ -99,6 +99,7 @@ using plssvm::stdpar::detail::device_kernel_assembly_symm;
 using plssvm::stdpar::detail::device_kernel_predict;
 using plssvm::stdpar::detail::device_kernel_predict_linear;
 using plssvm::stdpar::detail::device_kernel_symm;
+using plssvm::stdpar::detail::device_kernel_symm_mirror;
 using plssvm::stdpar::detail::device_kernel_w_linear;
 #include "tests/backends/generic_csvm_tests.hpp"  // generic backend C-SVM tests to instantiate
 
diff --git a/tests/classification_report.cpp b/tests/classification_report.cpp
index dbcb860bd..ff5c2afd7 100644
--- a/tests/classification_report.cpp
+++ b/tests/classification_report.cpp
@@ -87,17 +87,17 @@ TEST_F(ZeroDivisionBehavior, sanitize_nan_one) {
     EXPECT_EQ(plssvm::detail::sanitize_nan(42.0, 1.0, plssvm::classification_report::zero_division_behavior::one, "Foo"), 42.0);
 }
 
-// TEST_F(ZeroDivisionBehavior, sanitize_nan_nan) {
-//     // sanitize NaN using nan
-// #if !defined(PLSSVM_USE_FAST_MATH) || defined(_MSC_VER)
-//     // ATTENTION: MSVC doesn't optimize out the NaN check even if fast math is used
-//     EXPECT_TRUE(std::isnan(plssvm::detail::sanitize_nan(42.0, 0.0, plssvm::classification_report::zero_division_behavior::nan, "Foo")));
-// #else
-//     // ATTENTION: std::isnan will ALWAYS return false due to -ffast-math being enabled in release mode (in GCC and clang)
-//     EXPECT_FALSE(std::isnan(plssvm::detail::sanitize_nan(42.0, 0.0, plssvm::classification_report::zero_division_behavior::nan, "Foo")));
-// #endif
-//     EXPECT_EQ(plssvm::detail::sanitize_nan(42.0, 1.0, plssvm::classification_report::zero_division_behavior::nan, "Foo"), 42.0);
-// }
+TEST_F(ZeroDivisionBehavior, sanitize_nan_nan) {
+    // sanitize NaN using nan
+    // #if !defined(PLSSVM_USE_FAST_MATH) || defined(_MSC_VER)
+    //     // ATTENTION: MSVC doesn't optimize out the NaN check even if fast math is used
+    //     EXPECT_TRUE(std::isnan(plssvm::detail::sanitize_nan(42.0, 0.0, plssvm::classification_report::zero_division_behavior::nan, "Foo")));
+    // #else
+    //     // ATTENTION: std::isnan will ALWAYS return false due to -ffast-math being enabled in release mode (in GCC and clang)
+    //     EXPECT_FALSE(std::isnan(plssvm::detail::sanitize_nan(42.0, 0.0, plssvm::classification_report::zero_division_behavior::nan, "Foo")));
+    // #endif
+    EXPECT_EQ(plssvm::detail::sanitize_nan(42.0, 1.0, plssvm::classification_report::zero_division_behavior::nan, "Foo"), 42.0);
+}
 
 //*************************************************************************************************************************************//
 //                                                               metrics                                                               //
diff --git a/tests/classification_types.cpp b/tests/classification_types.cpp
index 281b8edc4..bcb5c326c 100644
--- a/tests/classification_types.cpp
+++ b/tests/classification_types.cpp
@@ -82,6 +82,11 @@ TEST(ClassificationType, calculate_number_of_classifiers) {
     EXPECT_EQ(calculate_number_of_classifiers(plssvm::classification_type::oao, 42), 861);
 }
 
+TEST(ClassificationType, calculate_number_of_classifiers_unknown) {
+    // should return 0 if the provided classification_type is invalid
+    EXPECT_EQ(calculate_number_of_classifiers(static_cast<plssvm::classification_type>(2), 2), 0);
+}
+
 TEST(ClassificationTypeDeathTest, too_few_classes) {
     // at least two classes must be provided
     EXPECT_DEATH(std::ignore = plssvm::calculate_number_of_classifiers(plssvm::classification_type::oaa, 1), "At least two classes must be given!");
diff --git a/tests/data/model/regression/invalid/wrong_nr_class.libsvm.model b/tests/data/model/regression/invalid/wrong_nr_class.libsvm.model
new file mode 100644
index 000000000..f40291c55
--- /dev/null
+++ b/tests/data/model/regression/invalid/wrong_nr_class.libsvm.model
@@ -0,0 +1,12 @@
+svm_type c_svr
+kernel_type linear
+nr_class 3
+total_sv 6
+rho 0.32260160011873423
+SV
+-1.8568721894e-01 1:-1.1178275006e+00 2:-2.9087188881e+00 3:6.6638344270e-01 4:1.0978832704e+00
+9.0116552290e-01 1:-5.2821182989e-01 2:-3.3588098497e-01 3:5.1687296030e-01 4:5.4604461446e-01
+-2.2483112395e-01 1:5.7650218263e-01 2:1.0140559662e+00 3:1.3009428080e-01 4:7.2619138869e-01
+1.4909749921e-02 1:1.8849404372e+00 2:1.0051856432e+00 3:2.9849993305e-01 4:1.6464627049e+00
+-4.5666857706e-01 1:-2.0981208921e-01 2:6.0276937379e-01 3:-1.3086851759e-01 4:1.0805254527e-01
+-4.8888352876e-02 1:-1.1256816276e+00 2:2.1254153434e+00 3:-1.6512657655e-01 4:2.5164553141e+00
diff --git a/tests/data/model/regression/invalid/wrong_num_rho.libsvm.model b/tests/data/model/regression/invalid/wrong_num_rho.libsvm.model
new file mode 100644
index 000000000..220c30035
--- /dev/null
+++ b/tests/data/model/regression/invalid/wrong_num_rho.libsvm.model
@@ -0,0 +1,12 @@
+svm_type c_svr
+kernel_type linear
+nr_class 2
+total_sv 6
+rho 0.32260160011873423 0.32260160011873423
+SV
+-1.8568721894e-01 1:-1.1178275006e+00 2:-2.9087188881e+00 3:6.6638344270e-01 4:1.0978832704e+00
+9.0116552290e-01 1:-5.2821182989e-01 2:-3.3588098497e-01 3:5.1687296030e-01 4:5.4604461446e-01
+-2.2483112395e-01 1:5.7650218263e-01 2:1.0140559662e+00 3:1.3009428080e-01 4:7.2619138869e-01
+1.4909749921e-02 1:1.8849404372e+00 2:1.0051856432e+00 3:2.9849993305e-01 4:1.6464627049e+00
+-4.5666857706e-01 1:-2.0981208921e-01 2:6.0276937379e-01 3:-1.3086851759e-01 4:1.0805254527e-01
+-4.8888352876e-02 1:-1.1256816276e+00 2:2.1254153434e+00 3:-1.6512657655e-01 4:2.5164553141e+00
diff --git a/tests/data/scaling_factors/invalid/feature_index_more_than_once.txt b/tests/data/scaling_factors/invalid/feature_index_more_than_once.txt
new file mode 100644
index 000000000..5d59e1759
--- /dev/null
+++ b/tests/data/scaling_factors/invalid/feature_index_more_than_once.txt
@@ -0,0 +1,7 @@
+# this is a comment that will be ignored!
+x
+-1.4 2.6
+1 0.0 1.0
+1 1.1 2.1
+2 3.3 4.3
+3 4.4 5.4
\ No newline at end of file
diff --git a/tests/data_set/classification/constructors.cpp b/tests/data_set/classification/constructors.cpp
index d566e6384..d6213df9b 100644
--- a/tests/data_set/classification/constructors.cpp
+++ b/tests/data_set/classification/constructors.cpp
@@ -11,9 +11,10 @@
 #include "plssvm/constants.hpp"                         // plssvm::real_type, plssvm::PADDING_SIZE
 #include "plssvm/data_set/classification_data_set.hpp"  // data set class to test
 #include "plssvm/data_set/min_max_scaler.hpp"           // plssvm::min_max_scaler
-#include "plssvm/exceptions/exceptions.hpp"             // plssvm::data_set_exception
+#include "plssvm/exceptions/exceptions.hpp"             // plssvm::data_set_exception, plssvm::mpi_exception
 #include "plssvm/file_format_types.hpp"                 // plssvm::file_format_type
 #include "plssvm/matrix.hpp"                            // plssvm::matrix, plssvm::layout_type
+#include "plssvm/mpi/communicator.hpp"                  // plssvm::mpi::communicator
 #include "plssvm/shape.hpp"                             // plssvm::shape
 
 #include "tests/custom_test_macros.hpp"  // EXPECT_FLOATING_POINT_MATRIX_EQ, EXPECT_FLOATING_POINT_MATRIX_NEAR, EXPECT_FLOATING_POINT_NEAR, EXPECT_THROW_WHAT
@@ -21,6 +22,10 @@
 #include "tests/types_to_test.hpp"       // util::{classification_label_type_gtest, classification_label_type_layout_type_gtest, test_parameter_type_at_t, test_parameter_value_at_v}
 #include "tests/utility.hpp"             // util::{redirect_output, temporary_file, instantiate_template_file, get_distinct_label, get_correct_data_file_labels, generate_specific_matrix, scale}
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_COMM_WORLD, MPI_Comm_dup, MPI_Comm_free
+#endif
+
 #include "gtest/gtest.h"  // TYPED_TEST, TYPED_TEST_SUITE, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE; ASSERT_TRUE, FAIL, ::testing::{Test, StaticAssertTypeEq}
 
 #include <cstddef>      // std::size_t
@@ -265,6 +270,30 @@ TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_arff_from_file) {
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_arff_from_file_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // must append .arff to filename so that the correct function is called in the data_set constructor
+    this->append_to_filename(".arff");
+
+    // create data set
+    util::instantiate_template_file<label_type>(PLSSVM_TEST_PATH "/data/arff/classification/6x4_TEMPLATE.arff", this->filename);
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, this->filename, { comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_libsvm_from_file) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -300,6 +329,27 @@ TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_libsvm_from_file)
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_libsvm_from_file_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set
+    util::instantiate_template_file<label_type>(PLSSVM_TEST_PATH "/data/libsvm/classification/6x4_TEMPLATE.libsvm", this->filename);
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, this->filename, { comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_explicit_arff_from_file) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -335,6 +385,27 @@ TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_explicit_arff_fro
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_explicit_arff_from_file_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set
+    util::instantiate_template_file<label_type>(PLSSVM_TEST_PATH "/data/arff/classification/6x4_TEMPLATE.arff", this->filename);
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, this->filename, plssvm::file_format_type::arff, { comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_explicit_libsvm_from_file) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -370,6 +441,27 @@ TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_explicit_libsvm_f
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_explicit_libsvm_from_file_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set
+    util::instantiate_template_file<label_type>(PLSSVM_TEST_PATH "/data/libsvm/classification/6x4_TEMPLATE.libsvm", this->filename);
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, this->filename, plssvm::file_format_type::libsvm, { comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 //*************************************************************************************************************************************//
 //                                                      construct from 2D vector                                                       //
 //*************************************************************************************************************************************//
@@ -460,6 +552,17 @@ TYPED_TEST(ClassificationDataSetConstructors, construct_from_vector_with_label)
     EXPECT_FALSE(data.scaling_factors().has_value());
 }
 
+TYPED_TEST(ClassificationDataSetConstructors, construct_from_empty_vector_and_labels) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+
+    // creating a data set from an empty vector is illegal
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ std::vector<std::vector<plssvm::real_type>>{}, labels }),
+                      plssvm::data_set_exception,
+                      "Data vector is empty!");
+}
+
 TYPED_TEST(ClassificationDataSetConstructors, construct_from_vector_mismatching_num_data_points_and_labels) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -506,6 +609,27 @@ TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_from_vector_witho
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_from_vector_without_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    const auto data_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, data_points.to_2D_vector(), plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_from_vector_with_label) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -542,6 +666,29 @@ TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_from_vector_with_
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetConstructors, construct_scaled_from_vector_with_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    const std::vector<label_type> different_labels = util::get_distinct_label<label_type>();
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    const auto correct_data_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ labels.size(), 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, correct_data_points.to_2D_vector(), labels, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 //*************************************************************************************************************************************//
 //                                                        construct from matrix                                                        //
 //*************************************************************************************************************************************//
@@ -685,6 +832,34 @@ TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_from_matrix_with_l
     EXPECT_FALSE(data.scaling_factors().has_value());
 }
 
+TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_from_empty_matrix_with_label) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create data points and labels
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    const plssvm::matrix<plssvm::real_type, layout> data_points{ plssvm::shape{ 0, 0 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+
+    // creating a data set from an empty vector is illegal
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ data_points, labels }),
+                      plssvm::data_set_exception,
+                      "Data vector is empty!");
+}
+
+TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_from_matrix_with_label_size_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create data points and labels
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    const plssvm::matrix<plssvm::real_type, layout> data_points{ plssvm::shape{ labels.size() - 1, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+
+    // creating a data set from an empty vector is illegal
+    EXPECT_THROW_WHAT_MATCHER((plssvm::classification_data_set<label_type>{ data_points, labels }),
+                              plssvm::data_set_exception,
+                              ::testing::HasSubstr(fmt::format("Number of labels ({}) must match the number of data points ({})!", labels.size(), labels.size() - 1)));
+}
+
 TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix_without_label_no_padding) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
@@ -719,6 +894,28 @@ TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix_without_label_no_padding_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    const auto correct_data_points = util::generate_specific_matrix<plssvm::matrix<plssvm::real_type, layout>>(plssvm::shape{ 4, 4 });
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, correct_data_points, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix_without_label) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
@@ -752,6 +949,28 @@ TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix_without_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    const auto data_points = util::generate_specific_matrix<plssvm::matrix<plssvm::real_type, layout>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, data_points, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix_with_label_no_padding) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
@@ -790,6 +1009,29 @@ TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix_with_label_no_padding_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points and labels
+    const std::vector<label_type> different_labels = util::get_distinct_label<label_type>();
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    const auto correct_data_points = util::generate_specific_matrix<plssvm::matrix<plssvm::real_type, layout>>(plssvm::shape{ labels.size(), 4 });
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, correct_data_points, labels, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+#endif
+
 TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix_with_label) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
@@ -827,6 +1069,30 @@ TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetMatrixConstructors, construct_scaled_from_matrix_with_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points and labels
+    const std::vector<label_type> different_labels = util::get_distinct_label<label_type>();
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    const auto correct_data_points = util::generate_specific_matrix<plssvm::matrix<plssvm::real_type, layout>>(plssvm::shape{ labels.size(), 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, correct_data_points, labels, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 //*************************************************************************************************************************************//
 //                                                    construct from r-value matrix                                                    //
 //*************************************************************************************************************************************//
@@ -960,6 +1226,27 @@ TYPED_TEST(ClassificationDataSetRValueMatrixConstructors, construct_scaled_from_
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetRValueMatrixConstructors, construct_scaled_from_rvalue_matrix_without_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    auto data_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, std::move(data_points), plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(ClassificationDataSetRValueMatrixConstructors, construct_scaled_from_rvalue_matrix_with_label) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -997,3 +1284,26 @@ TYPED_TEST(ClassificationDataSetRValueMatrixConstructors, construct_scaled_from_
         EXPECT_FLOATING_POINT_NEAR(factors.upper, std::get<2>(scaling_factors[i]));
     }
 }
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(ClassificationDataSetRValueMatrixConstructors, construct_scaled_from_rvalue_matrix_with_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points and labels
+    const std::vector<label_type> different_labels = util::get_distinct_label<label_type>();
+    std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    auto correct_data_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ labels.size(), 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::classification_data_set<label_type>{ plssvm::mpi::communicator{}, std::move(correct_data_points), std::move(labels), plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
diff --git a/tests/data_set/min_max_scaler.cpp b/tests/data_set/min_max_scaler.cpp
index 7a0b320f3..611aca495 100644
--- a/tests/data_set/min_max_scaler.cpp
+++ b/tests/data_set/min_max_scaler.cpp
@@ -46,12 +46,12 @@ TEST(MinMaxScaler, construct_factor) {
 
 TEST(MinMaxScaler, construct_interval) {
     // create scaling class
-    const plssvm::min_max_scaler scale{ plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } };
+    const plssvm::min_max_scaler scaler{ plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } };
 
     // test whether the values have been correctly set
-    EXPECT_FLOATING_POINT_EQ(scale.scaling_interval().first, plssvm::real_type{ -1.0 });
-    EXPECT_FLOATING_POINT_EQ(scale.scaling_interval().second, plssvm::real_type{ 1.0 });
-    EXPECT_FALSE(scale.scaling_factors().has_value());
+    EXPECT_FLOATING_POINT_EQ(scaler.scaling_interval().first, plssvm::real_type{ -1.0 });
+    EXPECT_FLOATING_POINT_EQ(scaler.scaling_interval().second, plssvm::real_type{ 1.0 });
+    EXPECT_FALSE(scaler.scaling_factors().has_value());
 }
 
 TEST(MinMaxScaler, construct_invalid_interval) {
@@ -65,34 +65,34 @@ TEST(MinMaxScaler, construct_from_file) {
     using factors_type = plssvm::min_max_scaler::factors;
 
     // create scaling class
-    const plssvm::min_max_scaler scale{ PLSSVM_TEST_PATH "/data/scaling_factors/scaling_factors.txt" };
+    const plssvm::min_max_scaler scaler{ PLSSVM_TEST_PATH "/data/scaling_factors/scaling_factors.txt" };
 
     // test whether the values have been correctly set
-    EXPECT_EQ(scale.scaling_interval().first, plssvm::detail::convert_to<plssvm::real_type>("-1.4"));
-    EXPECT_EQ(scale.scaling_interval().second, plssvm::detail::convert_to<plssvm::real_type>("2.6"));
+    EXPECT_EQ(scaler.scaling_interval().first, plssvm::detail::convert_to<plssvm::real_type>("-1.4"));
+    EXPECT_EQ(scaler.scaling_interval().second, plssvm::detail::convert_to<plssvm::real_type>("2.6"));
     const std::vector<factors_type> correct_factors = {
         factors_type{ 0, plssvm::real_type{ 0.0 }, plssvm::real_type{ 1.0 } },
         factors_type{ 1, plssvm::real_type{ 1.1 }, plssvm::real_type{ 2.1 } },
         factors_type{ 3, plssvm::real_type{ 3.3 }, plssvm::real_type{ 4.3 } },
         factors_type{ 4, plssvm::real_type{ 4.4 }, plssvm::real_type{ 5.4 } },
     };
-    ASSERT_TRUE(scale.scaling_factors().has_value());
-    ASSERT_EQ(scale.scaling_factors()->size(), correct_factors.size());
+    ASSERT_TRUE(scaler.scaling_factors().has_value());
+    ASSERT_EQ(scaler.scaling_factors()->size(), correct_factors.size());
     for (std::size_t i = 0; i < correct_factors.size(); ++i) {
-        EXPECT_EQ(scale.scaling_factors().value()[i].feature, correct_factors[i].feature);
-        EXPECT_FLOATING_POINT_EQ(scale.scaling_factors().value()[i].lower, correct_factors[i].lower);
-        EXPECT_FLOATING_POINT_EQ(scale.scaling_factors().value()[i].upper, correct_factors[i].upper);
+        EXPECT_EQ(scaler.scaling_factors().value()[i].feature, correct_factors[i].feature);
+        EXPECT_FLOATING_POINT_EQ(scaler.scaling_factors().value()[i].lower, correct_factors[i].lower);
+        EXPECT_FLOATING_POINT_EQ(scaler.scaling_factors().value()[i].upper, correct_factors[i].upper);
     }
 }
 
 TEST(MinMaxScaler, save) {
     // create scaling class
-    const plssvm::min_max_scaler scale{ PLSSVM_TEST_PATH "/data/scaling_factors/scaling_factors.txt" };
+    const plssvm::min_max_scaler scaler{ PLSSVM_TEST_PATH "/data/scaling_factors/scaling_factors.txt" };
 
     // create temporary file
     const util::temporary_file tmp_file{};  // automatically removes the created file at the end of its scope
     // save scaling factors
-    scale.save(tmp_file.filename);
+    scaler.save(tmp_file.filename);
 
     // read file and check its content
     plssvm::detail::io::file_reader reader{ tmp_file.filename };
@@ -111,12 +111,12 @@ TEST(MinMaxScaler, save) {
 
 TEST(MinMaxScaler, save_empty_scaling_factors) {
     // create scaling class
-    const plssvm::min_max_scaler scale{ plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } };
+    const plssvm::min_max_scaler scaler{ plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } };
 
     // create temporary file
     const util::temporary_file tmp_file{};  // automatically removes the created file at the end of its scope
     // save scaling factors
-    scale.save(tmp_file.filename);
+    scaler.save(tmp_file.filename);
 
     // read file and check its content
     plssvm::detail::io::file_reader reader{ tmp_file.filename };
@@ -128,3 +128,80 @@ TEST(MinMaxScaler, save_empty_scaling_factors) {
     const std::regex reg{ "[-+]?[0-9]*.?[0-9]+([eE][-+]?[0-9]+)? [-+]?[0-9]*.?[0-9]+([eE][-+]?[0-9]+)?", std::regex::extended };
     EXPECT_TRUE(std::regex_match(std::string{ reader.line(1) }, reg));
 }
+
+TEST(MinMaxScaler, scale_scaling_factors_empty) {
+    // create scaling class
+    plssvm::min_max_scaler scaler{ plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } };
+
+    // create data and ground truth result
+    auto data = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 10, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    const auto [data_scaled, scaling_factors] = util::scale(data, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 });
+
+    // scale the data (inplace)
+    scaler.scale(data);
+
+    // check whether scaling was successful
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(data, data_scaled);
+}
+
+TEST(MinMaxScaler, scale_scaling_factors) {
+    // create temporary file
+    const util::temporary_file tmp_file{};  // automatically removes the created file at the end of its scope
+
+    // create data and ground truth result
+    auto data = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 10, 5 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    auto data_2 = data;
+    const auto [data_scaled, scaling_factors] = util::scale(data, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 });
+
+    {
+        // create scaling class
+        plssvm::min_max_scaler scaler{ plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } };
+        // scale
+        scaler.scale(data);
+        // save scaling factors
+        scaler.save(tmp_file.filename);
+    }
+
+    // create scaling class
+    plssvm::min_max_scaler scaler{ tmp_file.filename };
+
+    // scale the data (inplace)
+    scaler.scale(data_2);
+
+    // check whether scaling was successful
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(data, data_scaled);
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(data_2, data_scaled);
+}
+
+TEST(MinMaxScaler, scale_too_many_scaling_factors) {
+    // create scaling class
+    plssvm::min_max_scaler scaler{ PLSSVM_TEST_PATH "/data/scaling_factors/scaling_factors.txt" };
+
+    // create data and ground truth result
+    auto data = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 10, 3 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    // invalid number of scaling factors
+    EXPECT_THROW_WHAT_MATCHER(scaler.scale(data), plssvm::min_max_scaler_exception, ::testing::HasSubstr("Need at most as much scaling factors as features in the data set are present (3), but 4 were given!"));
+}
+
+TEST(MinMaxScaler, scale_feature_index_too_big) {
+    // create scaling class
+    plssvm::min_max_scaler scaler{ PLSSVM_TEST_PATH "/data/scaling_factors/scaling_factors.txt" };
+
+    // create data and ground truth result
+    auto data = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 10, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    // invalid number of scaling factors
+    EXPECT_THROW_WHAT(scaler.scale(data), plssvm::min_max_scaler_exception, "The maximum scaling feature index most not be greater or equal than 4, but is 4!");
+}
+
+TEST(MinMaxScaler, scale_scaling_factor_more_than_once) {
+    // create scaling class
+    plssvm::min_max_scaler scaler{ PLSSVM_TEST_PATH "/data/scaling_factors/invalid/feature_index_more_than_once.txt" };
+
+    // create data and ground truth result
+    auto data = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 10, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    // invalid number of scaling factors
+    EXPECT_THROW_WHAT(scaler.scale(data), plssvm::min_max_scaler_exception, "Found more than one scaling factor for the feature index 0!");
+}
diff --git a/tests/data_set/regression/constructors.cpp b/tests/data_set/regression/constructors.cpp
index 82a239c65..d6001c93c 100644
--- a/tests/data_set/regression/constructors.cpp
+++ b/tests/data_set/regression/constructors.cpp
@@ -11,9 +11,10 @@
 #include "plssvm/constants.hpp"                     // plssvm::real_type, plssvm::PADDING_SIZE
 #include "plssvm/data_set/min_max_scaler.hpp"       // plssvm::min_max_scaler
 #include "plssvm/data_set/regression_data_set.hpp"  // data set class to test
-#include "plssvm/exceptions/exceptions.hpp"         // plssvm::data_set_exception
+#include "plssvm/exceptions/exceptions.hpp"         // plssvm::data_set_exception, plssvm::mpi_exception
 #include "plssvm/file_format_types.hpp"             // plssvm::file_format_type
 #include "plssvm/matrix.hpp"                        // plssvm::matrix, plssvm::layout_type
+#include "plssvm/mpi/communicator.hpp"              // plssvm::mpi::communicator
 #include "plssvm/shape.hpp"                         // plssvm::shape
 #include "plssvm/svm_types.hpp"                     // plssvm::svm_type
 
@@ -22,6 +23,10 @@
 #include "tests/types_to_test.hpp"       // util::{regression_label_type_gtest, regression_label_type_layout_type_gtest, test_parameter_type_at_t, test_parameter_value_at_v}
 #include "tests/utility.hpp"             // util::{redirect_output, temporary_file, instantiate_template_file, get_distinct_label, get_correct_data_file_labels, generate_specific_matrix, scale}
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_COMM_WORLD, MPI_Comm_dup, MPI_Comm_free
+#endif
+
 #include "gtest/gtest.h"  // TYPED_TEST, TYPED_TEST_SUITE, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE; ASSERT_TRUE, FAIL, ::testing::{Test, StaticAssertTypeEq}
 
 #include <cstddef>      // std::size_t
@@ -229,6 +234,26 @@ TYPED_TEST(RegressionDataSetConstructors, construct_scaled_arff_from_file) {
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetConstructors, construct_scaled_arff_from_file_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, PLSSVM_TEST_PATH "/data/arff/regression/6x4.arff", { comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(RegressionDataSetConstructors, construct_scaled_libsvm_from_file) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -259,6 +284,26 @@ TYPED_TEST(RegressionDataSetConstructors, construct_scaled_libsvm_from_file) {
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetConstructors, construct_scaled_libsvm_from_file_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, PLSSVM_TEST_PATH "/data/libsvm/regression/6x4.libsvm", { comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(RegressionDataSetConstructors, construct_scaled_explicit_arff_from_file) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -289,6 +334,26 @@ TYPED_TEST(RegressionDataSetConstructors, construct_scaled_explicit_arff_from_fi
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetConstructors, construct_scaled_explicit_arff_from_file_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, PLSSVM_TEST_PATH "/data/arff/regression/6x4.arff", plssvm::file_format_type::arff, { comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(RegressionDataSetConstructors, construct_scaled_explicit_libsvm_from_file) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -319,6 +384,26 @@ TYPED_TEST(RegressionDataSetConstructors, construct_scaled_explicit_libsvm_from_
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetConstructors, construct_scaled_explicit_libsvm_from_file_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, PLSSVM_TEST_PATH "/data/libsvm/regression/6x4.libsvm", plssvm::file_format_type::libsvm, { comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 //*************************************************************************************************************************************//
 //                                                      construct from 2D vector                                                       //
 //*************************************************************************************************************************************//
@@ -403,6 +488,17 @@ TYPED_TEST(RegressionDataSetConstructors, construct_from_vector_with_label) {
     EXPECT_FALSE(data.scaling_factors().has_value());
 }
 
+TYPED_TEST(RegressionDataSetConstructors, construct_from_empty_vector_and_labels) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+
+    // creating a data set from an empty vector is illegal
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ std::vector<std::vector<plssvm::real_type>>{}, labels }),
+                      plssvm::data_set_exception,
+                      "Data vector is empty!");
+}
+
 TYPED_TEST(RegressionDataSetConstructors, construct_from_vector_mismatching_num_data_points_and_labels) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -447,6 +543,27 @@ TYPED_TEST(RegressionDataSetConstructors, construct_scaled_from_vector_without_l
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetConstructors, construct_scaled_from_vector_without_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    const auto data_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, data_points.to_2D_vector(), plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(RegressionDataSetConstructors, construct_scaled_from_vector_with_label) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -479,6 +596,28 @@ TYPED_TEST(RegressionDataSetConstructors, construct_scaled_from_vector_with_labe
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetConstructors, construct_scaled_from_vector_with_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    const auto correct_data_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ labels.size(), 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, correct_data_points.to_2D_vector(), labels, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 //*************************************************************************************************************************************//
 //                                                        construct from matrix                                                        //
 //*************************************************************************************************************************************//
@@ -610,6 +749,34 @@ TYPED_TEST(RegressionDataSetMatrixConstructors, construct_from_matrix_with_label
     EXPECT_FALSE(data.scaling_factors().has_value());
 }
 
+TYPED_TEST(RegressionDataSetMatrixConstructors, construct_from_empty_matrix_with_label) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create data points and labels
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    const plssvm::matrix<plssvm::real_type, layout> data_points{ plssvm::shape{ 0, 0 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+
+    // creating a data set from an empty vector is illegal
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ data_points, labels }),
+                      plssvm::data_set_exception,
+                      "Data vector is empty!");
+}
+
+TYPED_TEST(RegressionDataSetMatrixConstructors, construct_from_matrix_with_label_size_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create data points and labels
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type, plssvm::svm_type::csvr>();
+    const plssvm::matrix<plssvm::real_type, layout> data_points{ plssvm::shape{ labels.size() - 1, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+
+    // creating a data set from an empty vector is illegal
+    EXPECT_THROW_WHAT_MATCHER((plssvm::regression_data_set<label_type>{ data_points, labels }),
+                              plssvm::data_set_exception,
+                              ::testing::HasSubstr(fmt::format("Number of labels ({}) must match the number of data points ({})!", labels.size(), labels.size() - 1)));
+}
+
 TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_without_label_no_padding) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
@@ -642,6 +809,28 @@ TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_wit
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_without_label_no_padding_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    const auto correct_data_points = util::generate_specific_matrix<plssvm::matrix<plssvm::real_type, layout>>(plssvm::shape{ 4, 4 });
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, correct_data_points, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_without_label) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
@@ -673,6 +862,28 @@ TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_wit
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_without_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    const auto data_points = util::generate_specific_matrix<plssvm::matrix<plssvm::real_type, layout>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, data_points, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_with_label_no_padding) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
@@ -707,6 +918,28 @@ TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_wit
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_with_label_no_padding_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points and labels
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    const auto correct_data_points = util::generate_specific_matrix<plssvm::matrix<plssvm::real_type, layout>>(plssvm::shape{ labels.size(), 4 });
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, correct_data_points, labels, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+#endif
+
 TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_with_label) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
@@ -740,6 +973,30 @@ TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_wit
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetMatrixConstructors, construct_scaled_from_matrix_with_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::layout_type layout = TestFixture::fixture_layout;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points and labels
+    const std::vector<label_type> different_labels = util::get_distinct_label<label_type>();
+    const std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    const auto correct_data_points = util::generate_specific_matrix<plssvm::matrix<plssvm::real_type, layout>>(plssvm::shape{ labels.size(), 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, correct_data_points, labels, plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 //*************************************************************************************************************************************//
 //                                                    construct from r-value matrix                                                    //
 //*************************************************************************************************************************************//
@@ -865,6 +1122,27 @@ TYPED_TEST(RegressionDataSetRValueMatrixConstructors, construct_scaled_from_rval
     }
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetRValueMatrixConstructors, construct_scaled_from_rvalue_matrix_without_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points
+    auto data_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, std::move(data_points), plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(RegressionDataSetRValueMatrixConstructors, construct_scaled_from_rvalue_matrix_with_label) {
     using label_type = typename TestFixture::fixture_label_type;
 
@@ -898,3 +1176,26 @@ TYPED_TEST(RegressionDataSetRValueMatrixConstructors, construct_scaled_from_rval
         EXPECT_FLOATING_POINT_NEAR(factors.upper, std::get<2>(scaling_factors[i]));
     }
 }
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(RegressionDataSetRValueMatrixConstructors, construct_scaled_from_rvalue_matrix_with_label_comm_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data points and labels
+    const std::vector<label_type> different_labels = util::get_distinct_label<label_type>();
+    std::vector<label_type> labels = util::get_correct_data_file_labels<label_type>();
+    auto correct_data_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ labels.size(), 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_THROW_WHAT((plssvm::regression_data_set<label_type>{ plssvm::mpi::communicator{}, std::move(correct_data_points), std::move(labels), plssvm::min_max_scaler{ comm, plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } } }),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the data set and scaler must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
diff --git a/tests/detail/assert.cpp b/tests/detail/assert.cpp
index 278a772b2..4ae3154ab 100644
--- a/tests/detail/assert.cpp
+++ b/tests/detail/assert.cpp
@@ -13,6 +13,8 @@
 #include "gmock/gmock.h"  // ::testing::ContainsRegex
 #include "gtest/gtest.h"  // TEST, ASSERT_DEATH, EXPECT_DEATH
 
+#include <string>  // std::string
+
 // only test if assertions are enabled
 #if defined(PLSSVM_ENABLE_ASSERTS)
 
@@ -25,6 +27,22 @@ TEST(PLSSVMAssert, assert_false) {
     ASSERT_DEATH(PLSSVM_ASSERT(false, "FALSE"), ::testing::ContainsRegex("Assertion '.*false.*' failed!"));
 }
 
+TEST(PLSSVMAssertDeathTest, check_assertion_false) {
+    const auto loc = plssvm::source_location::current();
+
+    // test regex
+    const std::string regex = fmt::format("Assertion '.*1 == 2.*' failed!\n"
+                                          "{}"
+                                          "  in file            .*\n"
+                                          "  in function        .*\n"
+                                          "  @ line             .*\n\n"
+                                          ".*msg 1.*\n",
+                                          loc.world_rank().has_value() ? "  on MPI world rank  .*\n" : "");
+
+    // calling check assertion with false should abort
+    EXPECT_DEATH(plssvm::detail::check_assertion(1 == 2, "1 == 2", loc, "msg {}", 1), ::testing::ContainsRegex(regex));
+}
+
 #endif
 
 // check the internal check_assertion function
@@ -32,8 +50,3 @@ TEST(PLSSVMAssert, check_assertion_true) {
     // calling check assertion with true shouldn't do anything
     plssvm::detail::check_assertion(true, "", plssvm::source_location::current(), "");
 }
-
-TEST(PLSSVMAssert, check_assertion_false) {
-    // calling check assertion with false should abort
-    EXPECT_DEATH(plssvm::detail::check_assertion(false, "cond", plssvm::source_location::current(), "msg {}", 1), "cond");
-}
diff --git a/tests/detail/cmd/cmd_utility.hpp b/tests/detail/cmd/cmd_utility.hpp
index 717d888d8..60dc120e8 100644
--- a/tests/detail/cmd/cmd_utility.hpp
+++ b/tests/detail/cmd/cmd_utility.hpp
@@ -13,6 +13,7 @@
 #define PLSSVM_TESTS_DETAIL_CMD_UTILITY_HPP_
 #pragma once
 
+#include "plssvm/mpi/communicator.hpp"  // plssvm::mpi::communicator
 #include "plssvm/verbosity_levels.hpp"  // plssvm::verbosity_level, plssvm::verbosity
 
 #include "tests/utility.hpp"  // util::redirect_output
@@ -57,6 +58,12 @@ class ParameterBase : public ::testing::Test,
         plssvm::verbosity = verbosity_save_;
     }
 
+    /**
+     * @brief Return the used MPI communicator.
+     * @return the MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const plssvm::mpi::communicator get_comm() const noexcept { return comm_; }
+
     /**
      * @brief Return the number of command line arguments encapsulated in this class.
      * @return the number of cmd arguments (`[[nodiscard]]`)
@@ -74,6 +81,8 @@ class ParameterBase : public ::testing::Test,
     mutable std::vector<std::string> cmd_options_{};
     /// The command line options cast to a char *.
     mutable std::vector<char *> cmd_argv_{};
+    /// The MPI communicator (unused during testing since we do not support MPI runtime tests).
+    plssvm::mpi::communicator comm_{};
     /// The verbosity level at the time of the test start.
     plssvm::verbosity_level verbosity_save_{};
 };
diff --git a/tests/detail/cmd/data_set_variants.cpp b/tests/detail/cmd/data_set_variants.cpp
index cac3ec5c8..68529fac2 100644
--- a/tests/detail/cmd/data_set_variants.cpp
+++ b/tests/detail/cmd/data_set_variants.cpp
@@ -14,6 +14,7 @@
 #include "plssvm/detail/cmd/parser_predict.hpp"  // plssvm::detail::cmd::parser_predict
 #include "plssvm/detail/cmd/parser_scale.hpp"    // plssvm::detail::cmd::parser_scale
 #include "plssvm/detail/cmd/parser_train.hpp"    // plssvm::detail::cmd::parser_train
+#include "plssvm/mpi/communicator.hpp"           // plssvm::mpi::communicator
 #include "plssvm/svm_types.hpp"                  // plssvm::svm_type
 
 #include "tests/detail/cmd/cmd_utility.hpp"  // util::ParameterBase
@@ -25,7 +26,7 @@
 
 #include <cstddef>  // std::size_t
 #include <string>   // std::string
-#include <tuple>    // std::tuple, std::make_tuple
+#include <tuple>    // std::tuple, std::make_tuple, std::ignore
 #include <vector>   // std::vector
 
 // the variant order is: classification<real_type, int> -> classification<real_type, std::string> -> regression<real_type, real_type>
@@ -62,10 +63,10 @@ TEST_P(DataSetFactory, data_set_factory_predict) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs(cmd_args);
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test active variant type
-    const plssvm::detail::cmd::data_set_variants var = plssvm::detail::cmd::data_set_factory(parser);
+    const plssvm::detail::cmd::data_set_variants var = plssvm::detail::cmd::data_set_factory(this->get_comm(), parser);
     EXPECT_EQ(var.index(), result_index);
 }
 
@@ -89,10 +90,10 @@ TEST_P(DataSetFactory, data_set_factory_scale) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs(cmd_args);
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test active variant type
-    const plssvm::detail::cmd::data_set_variants var = plssvm::detail::cmd::data_set_factory(parser);
+    const plssvm::detail::cmd::data_set_variants var = plssvm::detail::cmd::data_set_factory(this->get_comm(), parser);
     if (svm == plssvm::svm_type::csvr) {
         // the svm_type doesn't matter for plssvm-scale
         EXPECT_EQ(var.index(), 0);  // use corresponding classification data set index
@@ -121,10 +122,10 @@ TEST_P(DataSetFactory, data_set_factory_scale_restore_filename) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs(cmd_args);
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test active variant type
-    const plssvm::detail::cmd::data_set_variants var = plssvm::detail::cmd::data_set_factory(parser);
+    const plssvm::detail::cmd::data_set_variants var = plssvm::detail::cmd::data_set_factory(this->get_comm(), parser);
     if (svm == plssvm::svm_type::csvr) {
         // the svm_type doesn't matter for plssvm-scale
         EXPECT_EQ(var.index(), 0);  // use corresponding classification data set index
@@ -153,10 +154,10 @@ TEST_P(DataSetFactory, data_set_factory_train) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs(cmd_args);
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test active variant type
-    const plssvm::detail::cmd::data_set_variants var = plssvm::detail::cmd::data_set_factory(parser);
+    const plssvm::detail::cmd::data_set_variants var = plssvm::detail::cmd::data_set_factory(this->get_comm(), parser);
     EXPECT_EQ(var.index(), result_index);
 }
 
diff --git a/tests/detail/cmd/parser_predict.cpp b/tests/detail/cmd/parser_predict.cpp
index a9a00d6da..f8ee46ed3 100644
--- a/tests/detail/cmd/parser_predict.cpp
+++ b/tests/detail/cmd/parser_predict.cpp
@@ -14,10 +14,11 @@
 #include "plssvm/backends/Kokkos/execution_space.hpp"     // plssvm::kokkos::execution_space
 #include "plssvm/backends/SYCL/implementation_types.hpp"  // plssvm::sycl::implementation_type
 #include "plssvm/constants.hpp"                           // plssvm::real_type
+#include "plssvm/exceptions/exceptions.hpp"               // plssvm::cmd_parser_exit
 #include "plssvm/target_platforms.hpp"                    // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                    // plssvm::verbosity
 
-#include "tests/custom_test_macros.hpp"      // EXPECT_CONVERSION_TO_STRING
+#include "tests/custom_test_macros.hpp"      // EXPECT_CONVERSION_TO_STRING, EXPECT_THROW_WHAT
 #include "tests/detail/cmd/cmd_utility.hpp"  // util::ParameterBase
 #include "tests/naming.hpp"                  // naming::{pretty_print_parameter_flag_and_value, pretty_print_parameter_flag}
 #include "tests/utility.hpp"                 // util::{convert_from_string, redirect_output}
@@ -41,7 +42,7 @@ TEST_F(ParserPredict, minimal) {
     this->CreateCMDArgs({ "./plssvm-predict", "data.libsvm", "data.libsvm.model" });
 
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // check parsed values
     EXPECT_EQ(parser.backend, plssvm::backend_type::automatic);
@@ -61,7 +62,7 @@ TEST_F(ParserPredict, minimal_output) {
     this->CreateCMDArgs({ "./plssvm-predict", "data.libsvm", "data.libsvm.model" });
 
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test output string
     const std::string correct = fmt::format(
@@ -93,12 +94,15 @@ TEST_F(ParserPredict, all_arguments) {
 #endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" });
+#endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    cmd_args.insert(cmd_args.end(), { "--mpi_load_balancing_weights", "2" });
 #endif
     cmd_args.insert(cmd_args.end(), { "data.libsvm", "data.libsvm.model", "data.libsvm.predict" });
     this->CreateCMDArgs(cmd_args);
 
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // check parsed values
     EXPECT_EQ(parser.backend, plssvm::backend_type::cuda);
@@ -122,6 +126,9 @@ TEST_F(ParserPredict, all_arguments) {
 #else
     EXPECT_EQ(parser.performance_tracking_filename, "");
 #endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    EXPECT_EQ(parser.mpi_load_balancing_weights, std::vector<std::size_t>{ 2 });
+#endif
 
     EXPECT_EQ(plssvm::verbosity, plssvm::verbosity_level::libsvm);
 }
@@ -138,12 +145,15 @@ TEST_F(ParserPredict, all_arguments_output) {
 #endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" });
+#endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    cmd_args.insert(cmd_args.end(), { "--mpi_load_balancing_weights", "2" });
 #endif
     cmd_args.insert(cmd_args.end(), { "data1.libsvm", "data2.libsvm.model", "data3.libsvm.predict" });
     this->CreateCMDArgs(cmd_args);
 
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test output string
     std::string correct{
@@ -170,6 +180,9 @@ TEST_F(ParserPredict, all_arguments_output) {
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     correct += "performance tracking file: 'tracking.yaml'\n";
 #endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    correct += "mpi load-balancing weights: [2]\n";
+#endif
 
     EXPECT_CONVERSION_TO_STRING(parser, correct);
 
@@ -187,7 +200,7 @@ TEST_P(ParserPredictBackend, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.backend, backend);
 }
@@ -209,7 +222,7 @@ TEST_P(ParserPredictTargetPlatform, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.target, target_platform);
 }
@@ -233,7 +246,7 @@ TEST_P(ParserPredictSYCLImplementation, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.sycl_implementation_type, sycl_implementation_type);
 }
@@ -259,13 +272,13 @@ TEST_P(ParserPredictKokkosExecutionSpace, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.kokkos_execution_space, kokkos_execution_space);
 }
 
 // clang-format off
-INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserPredictKokkosExecutionSpace, ::testing::Combine(
+INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictKokkosExecutionSpace, ::testing::Combine(
                 ::testing::Values("--kokkos_execution_space"),
                 ::testing::Values("automatic", "Cuda", "HIP", "SYCL", "HPX", "OpenMP", "OpenMPTarget", "OpenACC", "Threads", "Serial")),
                 naming::pretty_print_parameter_flag_and_value<ParserPredictKokkosExecutionSpace>);
@@ -283,7 +296,7 @@ TEST_P(ParserPredictPerformanceTrackingFilename, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.performance_tracking_filename, value);
 }
@@ -297,6 +310,50 @@ INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictPerformanceTrackingFilename
 
 #endif  // PLSSVM_PERFORMANCE_TRACKER_ENABLED
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+class ParserPredictMPILoadBalancingWeights : public ParserPredict,
+                                             public ::testing::WithParamInterface<std::tuple<std::string, std::string>> { };
+
+TEST_P(ParserPredictMPILoadBalancingWeights, parsing) {
+    const auto &[flag, value] = GetParam();
+    // convert string to std::vector
+    const std::vector<std::size_t> weights{ util::convert_from_string<std::size_t>(value) };
+    // create artificial command line arguments in test fixture
+    this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
+    // create parameter object
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
+    // test for correctness
+    EXPECT_EQ(parser.mpi_load_balancing_weights, weights);
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictMPILoadBalancingWeights, ::testing::Combine(
+                ::testing::Values("--mpi_load_balancing_weights"),
+                ::testing::Values("1", "2")),
+                naming::pretty_print_parameter_flag_and_value<ParserPredictMPILoadBalancingWeights>);
+// clang-format on
+
+class ParserPredictMPILoadBalancingWeightsInvalid : public ParserPredict,
+                                                    public ::testing::WithParamInterface<std::tuple<std::string, std::string>> { };
+
+TEST_P(ParserPredictMPILoadBalancingWeightsInvalid, parsing) {
+    const auto &[flag, value] = GetParam();
+    // create artificial command line arguments in test fixture
+    this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
+    // create parameter object
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_predict{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictMPILoadBalancingWeightsInvalid, ::testing::Combine(
+                ::testing::Values("--mpi_load_balancing_weights"),
+                ::testing::Values("1,2", "1,2,3")),
+                naming::pretty_print_parameter_flag_and_value<ParserPredictMPILoadBalancingWeightsInvalid>);
+// clang-format on
+
+#endif  // PLSSVM_HAS_MPI_ENABLED
+
 class ParserPredictUseStringsAsLabels : public ParserPredict,
                                         public ::testing::WithParamInterface<std::tuple<std::string, bool>> { };
 
@@ -305,7 +362,7 @@ TEST_P(ParserPredictUseStringsAsLabels, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", fmt::format("{}={}", flag, value), "data.libsvm", "data.libsvm.model" });
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.strings_as_labels, value);
 }
@@ -325,13 +382,13 @@ TEST_P(ParserPredictVerbosity, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(fmt::format("{}", plssvm::verbosity), value);
 }
 
 // clang-format off
-INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserPredictVerbosity, ::testing::Combine(
+INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictVerbosity, ::testing::Combine(
                 ::testing::Values("--verbosity"),
                 ::testing::Values("quiet", "libsvm", "timing", "full")),
                 naming::pretty_print_parameter_flag_and_value<ParserPredictVerbosity>);
@@ -346,7 +403,7 @@ TEST_P(ParserPredictQuiet, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", flag, "data.libsvm", "data.libsvm.model" });
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(plssvm::verbosity, flag.empty() ? old_verbosity : plssvm::verbosity_level::quiet);
 }
@@ -360,72 +417,66 @@ TEST_F(ParserPredictVerbosityAndQuiet, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", "--quiet", "--verbosity", "full", "data.libsvm", "data.libsvm.model" });
     // create parameter object
-    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_predict parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // the quiet flag overrides the verbosity flag
     EXPECT_EQ(plssvm::verbosity, plssvm::verbosity_level::quiet);
 }
 
-class ParserPredictHelpDeathTest : public ParserPredict,
-                                   public ::testing::WithParamInterface<std::string> { };
+class ParserPredictHelp : public ParserPredict,
+                          public ::testing::WithParamInterface<std::string> { };
 
-TEST_P(ParserPredictHelpDeathTest, parsing) {
+TEST_P(ParserPredictHelp, parsing) {
     const std::string &flag = GetParam();
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", flag });
     // create parameter object
-    EXPECT_EXIT((plssvm::detail::cmd::parser_predict{ this->get_argc(), this->get_argv() }), ::testing::ExitedWithCode(EXIT_SUCCESS), "");
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_predict{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_SUCCESS));
 }
 
-INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictHelpDeathTest, ::testing::Values("-h", "--help"), naming::pretty_print_parameter_flag<ParserPredictHelpDeathTest>);
+INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictHelp, ::testing::Values("-h", "--help"), naming::pretty_print_parameter_flag<ParserPredictHelp>);
 
-class ParserPredictVersionDeathTest : public ParserPredict,
-                                      public ::testing::WithParamInterface<std::string> { };
+class ParserPredictVersion : public ParserPredict,
+                             public ::testing::WithParamInterface<std::string> { };
 
-TEST_P(ParserPredictVersionDeathTest, parsing) {
+TEST_P(ParserPredictVersion, parsing) {
     const std::string &flag = GetParam();
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-predict", flag });
     // create parameter object
-    EXPECT_EXIT((plssvm::detail::cmd::parser_predict{ this->get_argc(), this->get_argv() }), ::testing::ExitedWithCode(EXIT_SUCCESS), "");
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_predict{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_SUCCESS));
 }
 
-INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictVersionDeathTest, ::testing::Values("-v", "--version"), naming::pretty_print_parameter_flag<ParserPredictVersionDeathTest>);
-
-class ParserPredictDeathTest : public ParserPredict { };
+INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictVersion, ::testing::Values("-v", "--version"), naming::pretty_print_parameter_flag<ParserPredictVersion>);
 
-TEST_F(ParserPredictDeathTest, no_positional_argument) {
+TEST_F(ParserPredict, no_positional_argument) {
     this->CreateCMDArgs({ "./plssvm-predict" });
-    EXPECT_EXIT((plssvm::detail::cmd::parser_predict{ this->get_argc(), this->get_argv() }),
-                ::testing::ExitedWithCode(EXIT_FAILURE),
-                ::testing::HasSubstr("ERROR: missing test file!"));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_predict{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
 }
 
-TEST_F(ParserPredictDeathTest, single_positional_argument) {
+TEST_F(ParserPredict, single_positional_argument) {
     this->CreateCMDArgs({ "./plssvm-predict", "data.libsvm" });
-    EXPECT_EXIT((plssvm::detail::cmd::parser_predict{ this->get_argc(), this->get_argv() }),
-                ::testing::ExitedWithCode(EXIT_FAILURE),
-                ::testing::HasSubstr("ERROR: missing model file!"));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_predict{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
 }
 
-TEST_F(ParserPredictDeathTest, too_many_positional_arguments) {
+TEST_F(ParserPredict, too_many_positional_arguments) {
     this->CreateCMDArgs({ "./plssvm-predict", "p1", "p2", "p3", "p4" });
-    EXPECT_EXIT((plssvm::detail::cmd::parser_predict{ this->get_argc(), this->get_argv() }),
-                ::testing::ExitedWithCode(EXIT_FAILURE),
-                ::testing::HasSubstr(R"(ERROR: only up to three positional options may be given, but 1 ("p4") additional option(s) where provided!)"));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_predict{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
 }
 
+TEST_F(ParserPredict, unrecognized_option) {
+    this->CreateCMDArgs({ "./plssvm-predict", "--foo", "bar" });
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_predict{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
+}
+
+class ParserPredictDeathTest : public ParserPredict { };
+
 // test whether nonsensical cmd arguments trigger the assertions
 TEST_F(ParserPredictDeathTest, too_few_argc) {
-    EXPECT_DEATH((plssvm::detail::cmd::parser_predict{ 0, nullptr }),
+    EXPECT_DEATH((plssvm::detail::cmd::parser_predict{ this->get_comm(), 0, nullptr }),
                  ::testing::HasSubstr("At least one argument is always given (the executable name), but argc is 0!"));
 }
 
 TEST_F(ParserPredictDeathTest, nullptr_argv) {
-    EXPECT_DEATH((plssvm::detail::cmd::parser_predict{ 1, nullptr }),
+    EXPECT_DEATH((plssvm::detail::cmd::parser_predict{ this->get_comm(), 1, nullptr }),
                  ::testing::HasSubstr("At least one argument is always given (the executable name), but argv is a nullptr!"));
 }
-
-TEST_F(ParserPredictDeathTest, unrecognized_option) {
-    this->CreateCMDArgs({ "./plssvm-predict", "--foo", "bar" });
-    EXPECT_DEATH((plssvm::detail::cmd::parser_predict{ this->get_argc(), this->get_argv() }), "");
-}
diff --git a/tests/detail/cmd/parser_scale.cpp b/tests/detail/cmd/parser_scale.cpp
index adb2f413e..f38bc3d0b 100644
--- a/tests/detail/cmd/parser_scale.cpp
+++ b/tests/detail/cmd/parser_scale.cpp
@@ -10,11 +10,13 @@
 
 #include "plssvm/detail/cmd/parser_scale.hpp"
 
-#include "plssvm/constants.hpp"          // plssvm::real_type
-#include "plssvm/file_format_types.hpp"  // plssvm::file_format_type
-#include "plssvm/verbosity_levels.hpp"   // plssvm::verbosity
+#include "plssvm/constants.hpp"              // plssvm::real_type
+#include "plssvm/exceptions/exceptions.hpp"  // plssvm::cmd_parser_exit
+#include "plssvm/file_format_types.hpp"      // plssvm::file_format_type
+#include "plssvm/mpi/communicator.hpp"       // plssvm::mpi::communicator
+#include "plssvm/verbosity_levels.hpp"       // plssvm::verbosity
 
-#include "tests/custom_test_macros.hpp"      // EXPECT_CONVERSION_TO_STRING
+#include "tests/custom_test_macros.hpp"      // EXPECT_CONVERSION_TO_STRING, EXPECT_THROW_WHAT
 #include "tests/detail/cmd/cmd_utility.hpp"  // util::ParameterBase
 #include "tests/naming.hpp"                  // naming::{pretty_print_parameter_flag_and_value, pretty_print_parameter_flag}
 #include "tests/utility.hpp"                 // util::{convert_from_string, redirect_output}
@@ -38,7 +40,7 @@ TEST_F(ParserScale, minimal) {
     this->CreateCMDArgs({ "./plssvm-scale", "data.libsvm" });
 
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // check default values
     EXPECT_FLOATING_POINT_EQ(parser.lower, plssvm::real_type{ -1.0 });
@@ -59,7 +61,7 @@ TEST_F(ParserScale, minimal_output) {
     this->CreateCMDArgs({ "./plssvm-scale", "data.libsvm" });
 
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test output string
     const std::string correct = fmt::format(
@@ -89,7 +91,7 @@ TEST_F(ParserScale, all_arguments) {
     this->CreateCMDArgs(cmd_args);
 
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // check default values
     EXPECT_FLOATING_POINT_EQ(parser.lower, plssvm::real_type{ -2.0 });
@@ -117,7 +119,7 @@ TEST_F(ParserScale, all_arguments_output) {
     this->CreateCMDArgs(cmd_args);
 
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test output string
     std::string correct = fmt::format(
@@ -148,7 +150,7 @@ TEST_P(ParserScaleLower, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag, fmt::format("{}", value), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_FLOATING_POINT_EQ(parser.lower, value);
 }
@@ -168,7 +170,7 @@ TEST_P(ParserScaleUpper, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag, fmt::format("{}", value), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_FLOATING_POINT_EQ(parser.upper, value);
 }
@@ -190,7 +192,7 @@ TEST_P(ParserScaleFileFormat, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.format, backend);
 }
@@ -210,7 +212,7 @@ TEST_P(ParserScaleSaveFilename, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.save_filename, value);
 }
@@ -230,7 +232,7 @@ TEST_P(ParserScaleRestoreFilename, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.restore_filename, value);
 }
@@ -242,6 +244,34 @@ INSTANTIATE_TEST_SUITE_P(ParserScale, ParserScaleRestoreFilename, ::testing::Com
                 naming::pretty_print_parameter_flag_and_value<ParserScaleRestoreFilename>);
 // clang-format on
 
+class ParserScaleRestoreFilenameLowerUpper : public ParserScale,
+                                             public ::testing::WithParamInterface<std::tuple<std::string, std::string>> { };
+
+TEST_P(ParserScaleRestoreFilenameLowerUpper, parsing) {
+    util::redirect_output<&std::clog> clog_capture{};
+
+    // explicitly enable logging
+    plssvm::verbosity = plssvm::verbosity_level::full;
+
+    const auto &[flag, value] = GetParam();
+    // create artificial command line arguments in test fixture
+    this->CreateCMDArgs({ "./plssvm-scale", flag, value, "-l", "-1.0", "data.libsvm" });
+    // create parameter object
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
+    // test for correctness
+    EXPECT_EQ(parser.restore_filename, value);
+
+    // check captured output for warning message
+    EXPECT_THAT(clog_capture.get_capture(), ::testing::HasSubstr("WARNING: provided -l (--lower) and/or -u (--upper) together with -r (--restore_filename); ignoring -l/-u"));
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(ParserScale, ParserScaleRestoreFilenameLowerUpper, ::testing::Combine(
+                ::testing::Values("-r", "--restore_filename"),
+                ::testing::Values("data.libsvm.weights", "output.txt")),
+                naming::pretty_print_parameter_flag_and_value<ParserScaleRestoreFilenameLowerUpper>);
+// clang-format on
+
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
 
 class ParserScalePerformanceTrackingFilename : public ParserScale,
@@ -252,7 +282,7 @@ TEST_P(ParserScalePerformanceTrackingFilename, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.performance_tracking_filename, value);
 }
@@ -274,7 +304,7 @@ TEST_P(ParserScaleUseStringsAsLabels, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", fmt::format("{}={}", flag, value), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.strings_as_labels, value);
 }
@@ -294,7 +324,7 @@ TEST_P(ParserScaleVerbosity, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(fmt::format("{}", plssvm::verbosity), value);
 }
@@ -315,7 +345,7 @@ TEST_P(ParserScaleQuiet, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(plssvm::verbosity, flag.empty() ? old_verbosity : plssvm::verbosity_level::quiet);
 }
@@ -329,80 +359,72 @@ TEST_F(ParserScaleVerbosityAndQuiet, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", "--quiet", "--verbosity", "full", "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_scale parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_scale parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // the quiet flag overrides the verbosity flag
     EXPECT_EQ(plssvm::verbosity, plssvm::verbosity_level::quiet);
 }
 
-class ParserScaleHelpDeathTest : public ParserScale,
-                                 public ::testing::WithParamInterface<std::string> { };
+class ParserScaleHelp : public ParserScale,
+                        public ::testing::WithParamInterface<std::string> { };
 
-TEST_P(ParserScaleHelpDeathTest, parsing) {
+TEST_P(ParserScaleHelp, parsing) {
     const std::string &flag = GetParam();
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag });
     // create parameter object
-    EXPECT_EXIT((plssvm::detail::cmd::parser_scale{ this->get_argc(), this->get_argv() }), ::testing::ExitedWithCode(EXIT_SUCCESS), "");
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_scale{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_SUCCESS));
 }
 
-INSTANTIATE_TEST_SUITE_P(ParserScale, ParserScaleHelpDeathTest, ::testing::Values("-h", "--help"), naming::pretty_print_parameter_flag<ParserScaleHelpDeathTest>);
+INSTANTIATE_TEST_SUITE_P(ParserScale, ParserScaleHelp, ::testing::Values("-h", "--help"), naming::pretty_print_parameter_flag<ParserScaleHelp>);
 
-class ParserScaleVersionDeathTest : public ParserScale,
-                                    public ::testing::WithParamInterface<std::string> { };
+class ParserScaleVersion : public ParserScale,
+                           public ::testing::WithParamInterface<std::string> { };
 
-TEST_P(ParserScaleVersionDeathTest, parsing) {
+TEST_P(ParserScaleVersion, parsing) {
     const std::string &flag = GetParam();
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-scale", flag });
     // create parameter object
-    EXPECT_EXIT((plssvm::detail::cmd::parser_scale{ this->get_argc(), this->get_argv() }), ::testing::ExitedWithCode(EXIT_SUCCESS), "");
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_scale{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_SUCCESS));
 }
 
-INSTANTIATE_TEST_SUITE_P(ParserScale, ParserScaleVersionDeathTest, ::testing::Values("-v", "--version"), naming::pretty_print_parameter_flag<ParserScaleVersionDeathTest>);
+INSTANTIATE_TEST_SUITE_P(ParserScale, ParserScaleVersion, ::testing::Values("-v", "--version"), naming::pretty_print_parameter_flag<ParserScaleVersion>);
 
-class ParserScaleDeathTest : public ParserScale { };
-
-TEST_F(ParserScaleDeathTest, no_positional_argument) {
+TEST_F(ParserScale, no_positional_argument) {
     this->CreateCMDArgs({ "./plssvm-scale" });
-    EXPECT_EXIT((plssvm::detail::cmd::parser_scale{ this->get_argc(), this->get_argv() }),
-                ::testing::ExitedWithCode(EXIT_FAILURE),
-                ::testing::HasSubstr("ERROR: missing input file!"));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_scale{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
 }
 
-TEST_F(ParserScaleDeathTest, save_and_restore) {
+TEST_F(ParserScale, save_and_restore) {
     this->CreateCMDArgs({ "./plssvm-scale", "-s", "data.libsvm.save", "-r", "data.libsvm.restore", "data.libsvm" });
-    EXPECT_EXIT((plssvm::detail::cmd::parser_scale{ this->get_argc(), this->get_argv() }),
-                ::testing::ExitedWithCode(EXIT_FAILURE),
-                ::testing::HasSubstr("ERROR: cannot use -s (--save_filename) and -r (--restore_filename) simultaneously!"));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_scale{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
 }
 
-TEST_F(ParserScaleDeathTest, too_many_positional_arguments) {
+TEST_F(ParserScale, too_many_positional_arguments) {
     this->CreateCMDArgs({ "./plssvm-scale", "p1", "p2", "p3", "p4" });
-    EXPECT_EXIT((plssvm::detail::cmd::parser_scale{ this->get_argc(), this->get_argv() }),
-                ::testing::ExitedWithCode(EXIT_FAILURE),
-                ::testing::HasSubstr(R"(ERROR: only up to two positional options may be given, but 2 ("p3 p4") additional option(s) where provided!)"));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_scale{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
 }
 
-TEST_F(ParserScaleDeathTest, illegal_scaling_range) {
+TEST_F(ParserScale, illegal_scaling_range) {
     // illegal [lower, upper] bound range
     this->CreateCMDArgs({ "./plssvm-scale", "-l", "1.0", "-u", "-1.0", "data.libsvm" });
-    EXPECT_EXIT((plssvm::detail::cmd::parser_scale{ this->get_argc(), this->get_argv() }),
-                ::testing::ExitedWithCode(EXIT_FAILURE),
-                ::testing::HasSubstr("ERROR: invalid scaling range [lower, upper] with [1, -1]!"));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_scale{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
 }
 
+TEST_F(ParserScale, unrecognized_option) {
+    this->CreateCMDArgs({ "./plssvm-scale", "--foo", "bar" });
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_scale{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
+}
+
+class ParserScaleDeathTest : public ParserScale { };
+
 // test whether nonsensical cmd arguments trigger the assertions
 TEST_F(ParserScaleDeathTest, too_few_argc) {
-    EXPECT_DEATH((plssvm::detail::cmd::parser_scale{ 0, nullptr }),
+    EXPECT_DEATH((plssvm::detail::cmd::parser_scale{ this->get_comm(), 0, nullptr }),
                  ::testing::HasSubstr("At least one argument is always given (the executable name), but argc is 0!"));
 }
 
 TEST_F(ParserScaleDeathTest, nullptr_argv) {
-    EXPECT_DEATH((plssvm::detail::cmd::parser_scale{ 1, nullptr }),
+    EXPECT_DEATH((plssvm::detail::cmd::parser_scale{ this->get_comm(), 1, nullptr }),
                  ::testing::HasSubstr("At least one argument is always given (the executable name), but argv is a nullptr!"));
 }
-
-TEST_F(ParserScaleDeathTest, unrecognized_option) {
-    this->CreateCMDArgs({ "./plssvm-scale", "--foo", "bar" });
-    EXPECT_DEATH((plssvm::detail::cmd::parser_scale{ this->get_argc(), this->get_argv() }), "");
-}
diff --git a/tests/detail/cmd/parser_train.cpp b/tests/detail/cmd/parser_train.cpp
index 73c842955..94337f90b 100644
--- a/tests/detail/cmd/parser_train.cpp
+++ b/tests/detail/cmd/parser_train.cpp
@@ -16,6 +16,7 @@
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
 #include "plssvm/classification_types.hpp"                   // plssvm::classification_type
 #include "plssvm/constants.hpp"                              // plssvm::real_type
+#include "plssvm/exceptions/exceptions.hpp"                  // plssvm::cmd_parser_exit
 #include "plssvm/gamma.hpp"                                  // plssvm::gamma_type
 #include "plssvm/kernel_function_types.hpp"                  // plssvm::kernel_function_type
 #include "plssvm/solver_types.hpp"                           // plssvm::solver_type
@@ -23,7 +24,7 @@
 #include "plssvm/target_platforms.hpp"                       // plssvm::target_platform
 #include "plssvm/verbosity_levels.hpp"                       // plssvm::verbosity
 
-#include "tests/custom_test_macros.hpp"      // EXPECT_CONVERSION_TO_STRING
+#include "tests/custom_test_macros.hpp"      // EXPECT_CONVERSION_TO_STRING, EXPECT_THROW_WHAT
 #include "tests/detail/cmd/cmd_utility.hpp"  // util::ParameterBase
 #include "tests/naming.hpp"                  // naming::{pretty_print_parameter_flag_and_value, pretty_print_parameter_flag}
 #include "tests/utility.hpp"                 // util::{convert_from_string, redirect_output}
@@ -44,14 +45,12 @@
 
 class ParserTrain : public util::ParameterBase { };
 
-class ParserTrainDeathTest : public ParserTrain { };
-
 TEST_F(ParserTrain, minimal) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", "data.libsvm" });
 
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // check parsed values
     EXPECT_EQ(parser.csvm_params, plssvm::parameter{});
@@ -77,7 +76,7 @@ TEST_F(ParserTrain, minimal_output) {
     this->CreateCMDArgs({ "./plssvm-train", "data.libsvm" });
 
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test output string
     const std::string correct = fmt::format(
@@ -116,12 +115,15 @@ TEST_F(ParserTrain, all_arguments) {
 #endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" });
+#endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    cmd_args.insert(cmd_args.end(), { "--mpi_load_balancing_weights", "2" });
 #endif
     cmd_args.insert(cmd_args.end(), { "data.libsvm", "data.libsvm.model" });
     this->CreateCMDArgs(cmd_args);
 
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // check parsed values
     EXPECT_EQ(parser.csvm_params.kernel_type, plssvm::kernel_function_type::polynomial);
@@ -156,6 +158,9 @@ TEST_F(ParserTrain, all_arguments) {
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     EXPECT_EQ(parser.performance_tracking_filename, "tracking.yaml");
 #endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    EXPECT_EQ(parser.mpi_load_balancing_weights, std::vector<std::size_t>{ 2 });
+#endif
 
     EXPECT_EQ(plssvm::verbosity, plssvm::verbosity_level::libsvm);
 }
@@ -172,12 +177,15 @@ TEST_F(ParserTrain, all_arguments_output) {
 #endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" });
+#endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    cmd_args.insert(cmd_args.end(), { "--mpi_load_balancing_weights", "2" });
 #endif
     cmd_args.insert(cmd_args.end(), { "data.libsvm", "data.libsvm.model" });
     this->CreateCMDArgs(cmd_args);
 
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
 
     // test output string
     std::string correct =
@@ -213,6 +221,9 @@ TEST_F(ParserTrain, all_arguments_output) {
         std::is_same_v<plssvm::real_type, float> ? "float" : "double");
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     correct += "performance tracking file: 'tracking.yaml'\n";
+#endif
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    correct += "mpi load-balancing weights: [2]\n";
 #endif
     EXPECT_CONVERSION_TO_STRING(parser, correct);
 
@@ -230,7 +241,7 @@ TEST_P(ParserTrainSvm, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.svm, svm_type);
 }
@@ -252,7 +263,7 @@ TEST_P(ParserTrainKernel, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.csvm_params.kernel_type, kernel_type);
 }
@@ -273,7 +284,7 @@ TEST_P(ParserTrainDegree, parsing) {
     this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", degree), "data.libsvm" });
 
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.csvm_params.degree, degree);
 }
@@ -293,7 +304,7 @@ TEST_P(ParserTrainGamma, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", gamma), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     if (std::holds_alternative<plssvm::real_type>(gamma)) {
         ASSERT_TRUE(std::holds_alternative<plssvm::real_type>(parser.csvm_params.gamma));
@@ -313,22 +324,22 @@ INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainGamma,
                 naming::pretty_print_parameter_flag_and_value<ParserTrainGamma>);
 // clang-format on
 
-class ParserTrainGammaDeathTest : public ParserTrain,
-                                  public ::testing::WithParamInterface<std::tuple<std::string, plssvm::real_type>> { };
+class ParserTrainGammaInvalid : public ParserTrain,
+                                public ::testing::WithParamInterface<std::tuple<std::string, plssvm::real_type>> { };
 
-TEST_P(ParserTrainGammaDeathTest, gamma_explicit_less_or_equal_to_zero) {
+TEST_P(ParserTrainGammaInvalid, gamma_explicit_less_or_equal_to_zero) {
     const auto &[flag, gamma] = GetParam();
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", gamma), "data.libsvm" });
     // create parser_train object
-    EXPECT_DEATH((plssvm::detail::cmd::parser_train{ this->get_argc(), this->get_argv() }), ::testing::HasSubstr(fmt::format("gamma must be greater than 0.0, but is {}!", gamma)));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_train{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
 }
 
 // clang-format off
-INSTANTIATE_TEST_SUITE_P(ParserTrainDeathTest, ParserTrainGammaDeathTest, ::testing::Combine(
+INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainGammaInvalid, ::testing::Combine(
                 ::testing::Values("-g", "--gamma"),
                 ::testing::Values(plssvm::real_type{ -2 }, plssvm::real_type{ -1.5 }, plssvm::real_type{ 0.0 })),
-                naming::pretty_print_parameter_flag_and_value<ParserTrainGammaDeathTest>);
+                naming::pretty_print_parameter_flag_and_value<ParserTrainGammaInvalid>);
 // clang-format on
 
 class ParserTrainCoef0 : public ParserTrain,
@@ -339,7 +350,7 @@ TEST_P(ParserTrainCoef0, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", coef0), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_FLOATING_POINT_EQ(parser.csvm_params.coef0, coef0);
 }
@@ -361,7 +372,7 @@ TEST_P(ParserTrainCost, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", cost), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_FLOATING_POINT_EQ(parser.csvm_params.cost, cost);
 }
@@ -381,7 +392,7 @@ TEST_P(ParserTrainEpsilon, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", eps), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_FLOATING_POINT_EQ(parser.epsilon, eps);
 }
@@ -402,7 +413,7 @@ TEST_P(ParserTrainMaxIter, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", max_iter), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.max_iter, max_iter);
 }
@@ -414,6 +425,24 @@ INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainMaxIter, ::testing::Combine(
                 naming::pretty_print_parameter_flag_and_value<ParserTrainMaxIter>);
 // clang-format on
 
+class ParserTrainMaxIterInvalid : public ParserTrain,
+                                  public ::testing::WithParamInterface<std::tuple<std::string, long long int>> { };
+
+TEST_P(ParserTrainMaxIterInvalid, max_iter_explicit_less_or_equal_to_zero) {
+    const auto &[flag, max_iter] = GetParam();
+    // create artificial command line arguments in test fixture
+    this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", max_iter), "data.libsvm" });
+    // create parameter object
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_train{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainMaxIterInvalid, ::testing::Combine(
+                ::testing::Values("-i", "--max_iter"),
+                ::testing::Values(-100, -10, -1, 0)),
+                naming::pretty_print_parameter_flag_and_value<ParserTrainMaxIterInvalid>);
+// clang-format on
+
 class ParserTrainSolver : public ParserTrain,
                           public ::testing::WithParamInterface<std::tuple<std::string, plssvm::solver_type>> { };
 
@@ -422,7 +451,7 @@ TEST_P(ParserTrainSolver, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", solver), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.solver, solver);
 }
@@ -434,24 +463,6 @@ INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainSolver, ::testing::Combine(
                 naming::pretty_print_parameter_flag_and_value<ParserTrainSolver>);
 // clang-format on
 
-class ParserTrainMaxIterDeathTest : public ParserTrain,
-                                    public ::testing::WithParamInterface<std::tuple<std::string, long long int>> { };
-
-TEST_P(ParserTrainMaxIterDeathTest, max_iter_explicit_less_or_equal_to_zero) {
-    const auto &[flag, max_iter] = GetParam();
-    // create artificial command line arguments in test fixture
-    this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", max_iter), "data.libsvm" });
-    // create parameter object
-    EXPECT_DEATH((plssvm::detail::cmd::parser_train{ this->get_argc(), this->get_argv() }), ::testing::HasSubstr(fmt::format("max_iter must be greater than 0, but is {}!", max_iter)));
-}
-
-// clang-format off
-INSTANTIATE_TEST_SUITE_P(ParserTrainDeathTest, ParserTrainMaxIterDeathTest, ::testing::Combine(
-                ::testing::Values("-i", "--max_iter"),
-                ::testing::Values(-100, -10, -1, 0)),
-                naming::pretty_print_parameter_flag_and_value<ParserTrainMaxIterDeathTest>);
-// clang-format on
-
 class ParserTrainClassification : public ParserTrain,
                                   public ::testing::WithParamInterface<std::tuple<std::string, plssvm::classification_type>> { };
 
@@ -460,7 +471,7 @@ TEST_P(ParserTrainClassification, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, fmt::format("{}", classification), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.classification, classification);
 }
@@ -482,7 +493,7 @@ TEST_P(ParserTrainBackend, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.backend, backend);
 }
@@ -504,7 +515,7 @@ TEST_P(ParserTrainTargetPlatform, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.target, target_platform);
 }
@@ -528,7 +539,7 @@ TEST_P(ParserTrainSYCLKernelInvocation, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.sycl_kernel_invocation_type, sycl_kernel_invocation_type);
 }
@@ -550,7 +561,7 @@ TEST_P(ParserTrainSYCLImplementation, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.sycl_implementation_type, sycl_implementation_type);
 }
@@ -576,7 +587,7 @@ TEST_P(ParserTrainKokkosExecutionSpace, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.kokkos_execution_space, kokkos_execution_space);
 }
@@ -600,7 +611,7 @@ TEST_P(ParserTrainPerformanceTrackingFilename, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.performance_tracking_filename, value);
 }
@@ -614,6 +625,50 @@ INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainPerformanceTrackingFilename, ::
 
 #endif  // PLSSVM_PERFORMANCE_TRACKER_ENABLED
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+class ParserTrainMPILoadBalancingWeights : public ParserTrain,
+                                           public ::testing::WithParamInterface<std::tuple<std::string, std::string>> { };
+
+TEST_P(ParserTrainMPILoadBalancingWeights, parsing) {
+    const auto &[flag, value] = GetParam();
+    // convert string to std::vector
+    const std::vector<std::size_t> weights{ util::convert_from_string<std::size_t>(value) };
+    // create artificial command line arguments in test fixture
+    this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
+    // create parameter object
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
+    // test for correctness
+    EXPECT_EQ(parser.mpi_load_balancing_weights, weights);
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainMPILoadBalancingWeights, ::testing::Combine(
+                ::testing::Values("--mpi_load_balancing_weights"),
+                ::testing::Values("1", "2")),
+                naming::pretty_print_parameter_flag_and_value<ParserTrainMPILoadBalancingWeights>);
+// clang-format on
+
+class ParserTrainMPILoadBalancingWeightsInvalid : public ParserTrain,
+                                                  public ::testing::WithParamInterface<std::tuple<std::string, std::string>> { };
+
+TEST_P(ParserTrainMPILoadBalancingWeightsInvalid, parsing) {
+    const auto &[flag, value] = GetParam();
+    // create artificial command line arguments in test fixture
+    this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
+    // create parameter object
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_train{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainMPILoadBalancingWeightsInvalid, ::testing::Combine(
+                ::testing::Values("--mpi_load_balancing_weights"),
+                ::testing::Values("1,2", "1,2,3")),
+                naming::pretty_print_parameter_flag_and_value<ParserTrainMPILoadBalancingWeightsInvalid>);
+// clang-format on
+
+#endif  // PLSSVM_HAS_MPI_ENABLED
+
 class ParserTrainUseStringsAsLabels : public ParserTrain,
                                       public ::testing::WithParamInterface<std::tuple<std::string, bool>> { };
 
@@ -622,7 +677,7 @@ TEST_P(ParserTrainUseStringsAsLabels, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", fmt::format("{}={}", flag, value), "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(parser.strings_as_labels, value);
 }
@@ -642,7 +697,7 @@ TEST_P(ParserTrainVerbosity, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(fmt::format("{}", plssvm::verbosity), value);
 }
@@ -663,7 +718,7 @@ TEST_P(ParserTrainQuiet, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag, "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // test for correctness
     EXPECT_EQ(plssvm::verbosity, flag.empty() ? old_verbosity : plssvm::verbosity_level::quiet);
 }
@@ -677,63 +732,123 @@ TEST_F(ParserTrainVerbosityAndQuiet, parsing) {
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", "--quiet", "--verbosity", "full", "data.libsvm" });
     // create parameter object
-    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
     // the quiet flag overrides the verbosity flag
     EXPECT_EQ(plssvm::verbosity, plssvm::verbosity_level::quiet);
 }
 
-class ParserTrainHelpDeathTest : public ParserTrain,
-                                 public ::testing::WithParamInterface<std::string> { };
+class ParserTrainHelp : public ParserTrain,
+                        public ::testing::WithParamInterface<std::string> { };
 
-TEST_P(ParserTrainHelpDeathTest, parsing) {
+TEST_P(ParserTrainHelp, parsing) {
     const std::string &flag = GetParam();
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag });
     // create parameter object
-    EXPECT_EXIT((plssvm::detail::cmd::parser_train{ this->get_argc(), this->get_argv() }), ::testing::ExitedWithCode(EXIT_SUCCESS), "");
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_train{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_SUCCESS));
 }
 
-INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainHelpDeathTest, ::testing::Values("-h", "--help"), naming::pretty_print_parameter_flag<ParserTrainHelpDeathTest>);
+INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainHelp, ::testing::Values("-h", "--help"), naming::pretty_print_parameter_flag<ParserTrainHelp>);
 
-class ParserTrainVersionDeathTest : public ParserTrain,
-                                    public ::testing::WithParamInterface<std::string> { };
+class ParserTrainVersion : public ParserTrain,
+                           public ::testing::WithParamInterface<std::string> { };
 
-TEST_P(ParserTrainVersionDeathTest, parsing) {
+TEST_P(ParserTrainVersion, parsing) {
     const std::string &flag = GetParam();
     // create artificial command line arguments in test fixture
     this->CreateCMDArgs({ "./plssvm-train", flag });
     // create parameter object
-    EXPECT_EXIT((plssvm::detail::cmd::parser_train{ this->get_argc(), this->get_argv() }), ::testing::ExitedWithCode(EXIT_SUCCESS), "");
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_train{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_SUCCESS));
 }
 
-INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainVersionDeathTest, ::testing::Values("-v", "--version"), naming::pretty_print_parameter_flag<ParserTrainVersionDeathTest>);
+INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainVersion, ::testing::Values("-v", "--version"), naming::pretty_print_parameter_flag<ParserTrainVersion>);
 
-TEST_F(ParserTrainDeathTest, no_positional_argument) {
+TEST_F(ParserTrain, no_positional_argument) {
     this->CreateCMDArgs({ "./plssvm-train" });
-    EXPECT_EXIT((plssvm::detail::cmd::parser_train{ this->get_argc(), this->get_argv() }),
-                ::testing::ExitedWithCode(EXIT_FAILURE),
-                ::testing::HasSubstr("ERROR: missing input file!"));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_train{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
 }
 
-TEST_F(ParserTrainDeathTest, too_many_positional_arguments) {
+TEST_F(ParserTrain, too_many_positional_arguments) {
     this->CreateCMDArgs({ "./plssvm-train", "p1", "p2", "p3", "p4" });
-    EXPECT_EXIT((plssvm::detail::cmd::parser_train{ this->get_argc(), this->get_argv() }),
-                ::testing::ExitedWithCode(EXIT_FAILURE),
-                ::testing::HasSubstr(R"(ERROR: only up to two positional options may be given, but 2 ("p3 p4") additional option(s) where provided!)"));
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_train{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
+}
+
+TEST_F(ParserTrain, unrecognized_option) {
+    this->CreateCMDArgs({ "./plssvm-train", "--foo", "bar" });
+    EXPECT_THROW_WHAT((plssvm::detail::cmd::parser_train{ this->get_comm(), this->get_argc(), this->get_argv() }), plssvm::cmd_parser_exit, fmt::format("exit code: {}", EXIT_FAILURE));
+}
+
+class ParserTrainOutput : public ParserTrain,
+                          public ::testing::WithParamInterface<std::string> { };
+
+TEST_P(ParserTrainOutput, parsing) {
+    const std::string &flag = GetParam();
+    // create artificial command line arguments in test fixture
+    this->CreateCMDArgs({ "./plssvm-train", "--kernel_type", flag, "data.libsvm" });
+    // create parameter object
+    const plssvm::detail::cmd::parser_train parser{ this->get_comm(), this->get_argc(), this->get_argv() };
+
+    const auto get_kernel_function_string = [](const plssvm::kernel_function_type kernel_type) {
+        switch (kernel_type) {
+            case plssvm::kernel_function_type::linear:
+                return "kernel_type: linear -> u'*v\n";
+            case plssvm::kernel_function_type::polynomial:
+                return "kernel_type: polynomial -> (gamma*u'*v+coef0)^degree\n"
+                       "degree: 3\n"
+                       "gamma: \"1 / num_features\"\n"
+                       "coef0: 0\n";
+            case plssvm::kernel_function_type::rbf:
+                return "kernel_type: rbf -> exp(-gamma*|u-v|^2)\n"
+                       "gamma: \"1 / num_features\"\n";
+            case plssvm::kernel_function_type::sigmoid:
+                return "kernel_type: sigmoid -> tanh(gamma*u'*v+coef0)\n"
+                       "gamma: \"1 / num_features\"\n"
+                       "coef0: 0\n";
+            case plssvm::kernel_function_type::laplacian:
+                return "kernel_type: laplacian -> exp(-gamma*|u-v|_1)\n"
+                       "gamma: \"1 / num_features\"\n";
+            case plssvm::kernel_function_type::chi_squared:
+                return "kernel_type: chi_squared -> exp(-gamma*sum_i((x[i]-y[i])^2/(x[i]+y[i])))\n"
+                       "gamma: \"1 / num_features\"\n";
+        }
+        return "unknown";
+    };
+
+    // test output string
+    std::string correct = fmt::format(
+        "svm_type: csvc\n"
+        "{}"
+        "cost: 1\n"
+        "epsilon: 1e-10\n"
+        "max_iter: num_data_points\n"
+        "backend: automatic\n"
+        "target platform: automatic\n"
+        "solver: automatic\n"
+        "SYCL implementation type: automatic\n"
+        "SYCL kernel invocation type: automatic\n"
+        "Kokkos execution space: automatic\n"
+        "classification_type: one vs. all\n"
+        "label_type: int\n"
+        "real_type: {}\n"
+        "input file (data set): 'data.libsvm'\n"
+        "output file (model): 'data.libsvm.model'\n",
+        get_kernel_function_string(parser.csvm_params.kernel_type),
+        std::is_same_v<plssvm::real_type, float> ? "float" : "double");
+
+    EXPECT_CONVERSION_TO_STRING(parser, correct);
 }
 
+INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainOutput, ::testing::Values("linear", "polynomial", "rbf", "sigmoid", "laplacian", "chi_squared"), naming::pretty_print_parameter_flag<ParserTrainOutput>);
+
+class ParserTrainDeathTest : public ParserTrain { };
+
 // test whether nonsensical cmd arguments trigger the assertions
 TEST_F(ParserTrainDeathTest, too_few_argc) {
-    EXPECT_DEATH((plssvm::detail::cmd::parser_train{ 0, nullptr }),
+    EXPECT_DEATH((plssvm::detail::cmd::parser_train{ this->get_comm(), 0, nullptr }),
                  ::testing::HasSubstr("At least one argument is always given (the executable name), but argc is 0!"));
 }
 
 TEST_F(ParserTrainDeathTest, nullptr_argv) {
-    EXPECT_DEATH((plssvm::detail::cmd::parser_train{ 1, nullptr }),
+    EXPECT_DEATH((plssvm::detail::cmd::parser_train{ this->get_comm(), 1, nullptr }),
                  ::testing::HasSubstr("At least one argument is always given (the executable name), but argv is a nullptr!"));
 }
-
-TEST_F(ParserTrainDeathTest, unrecognized_option) {
-    this->CreateCMDArgs({ "./plssvm-train", "--foo", "bar" });
-    EXPECT_DEATH((plssvm::detail::cmd::parser_train{ this->get_argc(), this->get_argv() }), "");
-}
diff --git a/tests/detail/data_distribution.cpp b/tests/detail/data_distribution.cpp
index 994f2a454..c3e488c21 100644
--- a/tests/detail/data_distribution.cpp
+++ b/tests/detail/data_distribution.cpp
@@ -12,11 +12,16 @@
 
 #include "plssvm/constants.hpp"           // plssvm::PADDING_SIZE
 #include "plssvm/detail/memory_size.hpp"  // plssvm::detail::memory_size
+#include "plssvm/mpi/communicator.hpp"    // plssvm::mpi::communicator
 
-#include "gtest/gtest.h"  // TEST, EXPECT_EQ, EXPECT_TRUE
+#include "tests/utility.hpp"  // util::redirect_output
+
+#include "gmock/gmock.h"  // EXPECT_THAT, ::testing::ContainsRegex
+#include "gtest/gtest.h"  // TEST, EXPECT_EQ, EXPECT_TRUE, ::testing::Test
 
 #include <algorithm>  // std::is_sorted
 #include <cstddef>    // std::size_t
+#include <iostream>   // std::cout, std::endl
 #include <vector>     // std::vector
 
 using namespace plssvm::detail::literals;
@@ -27,7 +32,21 @@ using namespace plssvm::detail::literals;
 
 TEST(TriangularDataDistribution, construct) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
+
+    // test getter
+    const std::vector<std::size_t> dist_vec = dist.distribution();
+    EXPECT_EQ(dist_vec.size(), 5);
+    EXPECT_EQ(dist_vec.front(), 0);                                   // the distribution must start with 0
+    EXPECT_EQ(dist_vec.back(), 1024);                                 // the distribution must end with the number of rows
+    EXPECT_TRUE(std::is_sorted(dist_vec.cbegin(), dist_vec.cend()));  // the distribution values must be sorted in ascending order
+    EXPECT_EQ(dist.num_rows(), 1024);
+    EXPECT_EQ(dist.num_places(), 4);
+}
+
+TEST(TriangularDataDistribution, construct_with_weights) {
+    // create a triangular data distribution
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{ std::vector<std::size_t>{ std::size_t{ 2 } } }, 1024, 4 };
 
     // test getter
     const std::vector<std::size_t> dist_vec = dist.distribution();
@@ -41,7 +60,7 @@ TEST(TriangularDataDistribution, construct) {
 
 TEST(TriangularDataDistribution, place_specific_num_rows) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the place specific number of rows calculation for sanity
     for (std::size_t place = 0; place < dist.num_places(); ++place) {
@@ -51,7 +70,7 @@ TEST(TriangularDataDistribution, place_specific_num_rows) {
 
 TEST(TriangularDataDistribution, place_row_offset) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the place specific row offset calculation
     for (std::size_t place = 0; place < dist.num_places(); ++place) {
@@ -62,7 +81,7 @@ TEST(TriangularDataDistribution, place_row_offset) {
 
 TEST(TriangularDataDistribution, distribution) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the distribution for sanity
     const std::vector<std::size_t> dist_vec = dist.distribution();
@@ -74,7 +93,7 @@ TEST(TriangularDataDistribution, distribution) {
 
 TEST(TriangularDataDistribution, distribution_one_place) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 1 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 1 };
 
     // check the distribution for sanity
     const std::vector<std::size_t> dist_vec = dist.distribution();
@@ -85,7 +104,7 @@ TEST(TriangularDataDistribution, distribution_one_place) {
 
 TEST(TriangularDataDistribution, distribution_fewer_rows_than_places) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 6, 8 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 6, 8 };
 
     // check the distribution for sanity
     const std::vector<std::size_t> dist_vec = dist.distribution();
@@ -97,15 +116,23 @@ TEST(TriangularDataDistribution, distribution_fewer_rows_than_places) {
 
 TEST(TriangularDataDistribution, num_rows) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the number of rows getter
     EXPECT_EQ(dist.num_rows(), 1024);
 }
 
+TEST(TriangularDataDistribution, total_num_places) {
+    // create a triangular data distribution
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
+
+    // check the total number of places getter -> since we only support a single MPI rank, that should be equal to num_places
+    EXPECT_EQ(dist.total_num_places(), 4);
+}
+
 TEST(TriangularDataDistribution, num_places) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the number of places getter
     EXPECT_EQ(dist.num_places(), 4);
@@ -113,7 +140,7 @@ TEST(TriangularDataDistribution, num_places) {
 
 TEST(TriangularDataDistribution, calculate_explicit_kernel_matrix_num_entries_padded) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the returned values
     for (std::size_t place = 0; place < dist.num_places(); ++place) {
@@ -124,7 +151,7 @@ TEST(TriangularDataDistribution, calculate_explicit_kernel_matrix_num_entries_pa
 
 TEST(TriangularDataDistribution, calculate_maximum_explicit_kernel_matrix_memory_needed_per_place) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the returned values
     const std::vector<plssvm::detail::memory_size> ret = dist.calculate_maximum_explicit_kernel_matrix_memory_needed_per_place(128, 32);
@@ -135,7 +162,7 @@ TEST(TriangularDataDistribution, calculate_maximum_explicit_kernel_matrix_memory
 
 TEST(TriangularDataDistribution, calculate_maximum_explicit_kernel_matrix_memory_allocation_size_per_place) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the returned values
     const std::vector<plssvm::detail::memory_size> ret = dist.calculate_maximum_explicit_kernel_matrix_memory_allocation_size_per_place(128, 32);
@@ -146,7 +173,7 @@ TEST(TriangularDataDistribution, calculate_maximum_explicit_kernel_matrix_memory
 
 TEST(TriangularDataDistribution, calculate_maximum_implicit_kernel_matrix_memory_needed_per_place) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the returned values
     const std::vector<plssvm::detail::memory_size> ret = dist.calculate_maximum_implicit_kernel_matrix_memory_needed_per_place(128, 32);
@@ -157,7 +184,7 @@ TEST(TriangularDataDistribution, calculate_maximum_implicit_kernel_matrix_memory
 
 TEST(TriangularDataDistribution, calculate_maximum_implicit_kernel_matrix_memory_allocation_size_per_place) {
     // create a triangular data distribution
-    const plssvm::detail::triangular_data_distribution dist{ 1024, 4 };
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the returned values
     const std::vector<plssvm::detail::memory_size> ret = dist.calculate_maximum_implicit_kernel_matrix_memory_allocation_size_per_place(128, 32);
@@ -166,13 +193,41 @@ TEST(TriangularDataDistribution, calculate_maximum_implicit_kernel_matrix_memory
     }
 }
 
+class TriangularDataDistributionCapture : public ::testing::Test,
+                                          public util::redirect_output<> { };
+
+TEST_F(TriangularDataDistributionCapture, output_operator) {
+    // create a triangular data distribution
+    const plssvm::detail::triangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
+
+    // output the distribution
+    std::cout << dist;
+
+    // check the captured result
+    EXPECT_THAT(this->get_capture(), ::testing::ContainsRegex("\\{ num_rows: .*, total_num_places: .*, dist: .* \\}"));
+}
+
 //*************************************************************************************************************************************//
 //                                                    rectangular data distributions                                                   //
 //*************************************************************************************************************************************//
 
 TEST(RectangularDataDistribution, construct) {
-    // create a triangular data distribution
-    const plssvm::detail::rectangular_data_distribution dist{ 1024, 4 };
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
+
+    // test getter
+    const std::vector<std::size_t> dist_vec = dist.distribution();
+    EXPECT_EQ(dist_vec.size(), 5);
+    EXPECT_EQ(dist_vec.front(), 0);                                   // the distribution must start with 0
+    EXPECT_EQ(dist_vec.back(), 1024);                                 // the distribution must end with the number of rows
+    EXPECT_TRUE(std::is_sorted(dist_vec.cbegin(), dist_vec.cend()));  // the distribution values must be sorted in ascending order
+    EXPECT_EQ(dist.num_rows(), 1024);
+    EXPECT_EQ(dist.num_places(), 4);
+}
+
+TEST(RectangularDataDistribution, construct_with_weights) {
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{ std::vector<std::size_t>{ std::size_t{ 3 } } }, 1024, 4 };
 
     // test getter
     const std::vector<std::size_t> dist_vec = dist.distribution();
@@ -185,8 +240,8 @@ TEST(RectangularDataDistribution, construct) {
 }
 
 TEST(RectangularDataDistribution, place_specific_num_rows) {
-    // create a triangular data distribution
-    const plssvm::detail::rectangular_data_distribution dist{ 1024, 4 };
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the place specific number of rows calculation for sanity
     for (std::size_t place = 0; place < dist.num_places(); ++place) {
@@ -195,8 +250,8 @@ TEST(RectangularDataDistribution, place_specific_num_rows) {
 }
 
 TEST(RectangularDataDistribution, place_row_offset) {
-    // create a triangular data distribution
-    const plssvm::detail::rectangular_data_distribution dist{ 1024, 4 };
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the place specific row offset calculation
     for (std::size_t place = 0; place < dist.num_places(); ++place) {
@@ -206,8 +261,8 @@ TEST(RectangularDataDistribution, place_row_offset) {
 }
 
 TEST(RectangularDataDistribution, distribution) {
-    // create a triangular data distribution
-    const plssvm::detail::rectangular_data_distribution dist{ 1024, 4 };
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the distribution for sanity
     const std::vector<std::size_t> dist_vec = dist.distribution();
@@ -218,8 +273,8 @@ TEST(RectangularDataDistribution, distribution) {
 }
 
 TEST(RectangularDataDistribution, distribution_one_place) {
-    // create a triangular data distribution
-    const plssvm::detail::rectangular_data_distribution dist{ 1024, 1 };
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 1 };
 
     // check the distribution for sanity
     const std::vector<std::size_t> dist_vec = dist.distribution();
@@ -229,8 +284,8 @@ TEST(RectangularDataDistribution, distribution_one_place) {
 }
 
 TEST(RectangularDataDistribution, distribution_fewer_rows_than_places) {
-    // create a triangular data distribution
-    const plssvm::detail::rectangular_data_distribution dist{ 6, 8 };
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 6, 8 };
 
     // check the distribution for sanity
     const std::vector<std::size_t> dist_vec = dist.distribution();
@@ -241,17 +296,39 @@ TEST(RectangularDataDistribution, distribution_fewer_rows_than_places) {
 }
 
 TEST(RectangularDataDistribution, num_rows) {
-    // create a triangular data distribution
-    const plssvm::detail::rectangular_data_distribution dist{ 1024, 4 };
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the number of rows getter
     EXPECT_EQ(dist.num_rows(), 1024);
 }
 
+TEST(RectangularDataDistribution, total_num_places) {
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
+
+    // check the total number of places getter -> since we only support a single MPI rank, that should be equal to num_places
+    EXPECT_EQ(dist.total_num_places(), 4);
+}
+
 TEST(RectangularDataDistribution, num_places) {
-    // create a triangular data distribution
-    const plssvm::detail::rectangular_data_distribution dist{ 1024, 4 };
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
 
     // check the number of places getter
     EXPECT_EQ(dist.num_places(), 4);
 }
+
+class RectangularDataDistributionCapture : public ::testing::Test,
+                                           public util::redirect_output<> { };
+
+TEST_F(RectangularDataDistributionCapture, output_operator) {
+    // create a rectangular data distribution
+    const plssvm::detail::rectangular_data_distribution dist{ plssvm::mpi::communicator{}, 1024, 4 };
+
+    // output the distribution
+    std::cout << dist;
+
+    // check the captured result
+    EXPECT_THAT(this->get_capture(), ::testing::ContainsRegex("\\{ num_rows: .*, total_num_places: .*, dist: .* \\}"));
+}
diff --git a/tests/detail/io/classification_libsvm_model_parsing/data_write.cpp b/tests/detail/io/classification_libsvm_model_parsing/data_write.cpp
index fc6424348..295daa5e7 100644
--- a/tests/detail/io/classification_libsvm_model_parsing/data_write.cpp
+++ b/tests/detail/io/classification_libsvm_model_parsing/data_write.cpp
@@ -36,7 +36,18 @@
 template <typename T>
 class LIBSVMClassificationModelDataWrite : public ::testing::Test,
                                            private util::redirect_output<>,
-                                           protected util::temporary_file { };
+                                           protected util::temporary_file {
+  public:
+    /**
+     * @brief Return the used MPI communicator.
+     * @return the MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const plssvm::mpi::communicator get_comm() const noexcept { return comm_; }
+
+  private:
+    /// The MPI communicator (unused during testing since we do not support MPI runtime tests).
+    plssvm::mpi::communicator comm_{};
+};
 
 TYPED_TEST_SUITE(LIBSVMClassificationModelDataWrite, util::classification_label_type_classification_type_gtest, naming::test_parameter_to_name);
 
@@ -95,7 +106,7 @@ TYPED_TEST(LIBSVMClassificationModelDataWrite, write) {
     const plssvm::classification_data_set<label_type> data_set{ data, std::vector<label_type>{ label } };
 
     // write the LIBSVM model file
-    plssvm::detail::io::write_libsvm_model_data_classification(this->filename, params, classification, rho, alpha, index_sets, data_set);
+    plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), params, classification, rho, alpha, index_sets, data_set);
 
     // read the written file
     plssvm::detail::io::file_reader reader{ this->filename };
@@ -315,7 +326,7 @@ TYPED_TEST(LIBSVMClassificationModelDataWriteDeathTest, empty_filename) {
     constexpr plssvm::classification_type classification = TestFixture::fixture_classification;
 
     // try writing the LIBSVM model header
-    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification("", this->get_params(), classification, this->get_rho(), this->get_alpha(), this->get_index_sets(), this->get_data_set())),
+    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification("", this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), this->get_index_sets(), this->get_data_set())),
                  "The provided model filename must not be empty!");
 }
 
@@ -327,7 +338,7 @@ TYPED_TEST(LIBSVMClassificationModelDataWriteDeathTest, missing_labels) {
     const plssvm::classification_data_set<label_type> data_set{ util::generate_random_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 4, 2 }) };
 
     // try writing the LIBSVM model header
-    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), this->get_index_sets(), data_set)),
+    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), this->get_index_sets(), data_set)),
                  "Cannot write a model file that does not include labels!");
 }
 
@@ -338,7 +349,7 @@ TYPED_TEST(LIBSVMClassificationModelDataWriteDeathTest, invalid_number_of_rho_va
     const std::vector<plssvm::real_type> rho = util::generate_random_vector<plssvm::real_type>(42);
 
     // try writing the LIBSVM model header
-    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, rho, this->get_alpha(), this->get_index_sets(), this->get_data_set())),
+    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, rho, this->get_alpha(), this->get_index_sets(), this->get_data_set())),
                  ::testing::HasSubstr(fmt::format("The number of rho values is 42 but must be {} ({})!", this->num_classifiers(), classification)));
 }
 
@@ -349,33 +360,33 @@ TYPED_TEST(LIBSVMClassificationModelDataWriteDeathTest, invalid_alpha_vector) {
         {
             // alpha vector too large
             const std::vector<plssvm::aos_matrix<plssvm::real_type>> alpha(2);
-            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
+            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
                          "In case of OAA, the alpha vector may only contain one matrix as entry, but has 2!");
         }
         {
             // invalid number of rows in matrix
             const std::vector<plssvm::aos_matrix<plssvm::real_type>> alpha{ plssvm::aos_matrix<plssvm::real_type>{ plssvm::shape{ 42, 6 } } };
-            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
+            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
                          fmt::format("The number of rows in the matrix must be {}, but is 42!", this->num_classifiers(), classification));
         }
         {
             // invalid number of columns in matrix
             const std::vector<plssvm::aos_matrix<plssvm::real_type>> alpha{ plssvm::aos_matrix<plssvm::real_type>{ plssvm::shape{ this->num_classifiers(), 42 } } };
-            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
+            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
                          ::testing::HasSubstr("The number of weights (42) must be equal to the number of support vectors (6)!"));
         }
     } else if constexpr (classification == plssvm::classification_type::oao) {
         {
             // alpha vector too large
             const std::vector<plssvm::aos_matrix<plssvm::real_type>> alpha(42);
-            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
+            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
                          fmt::format("The number of matrices in the alpha vector must contain {} entries, but contains 42 entries!", this->num_classifiers()));
         }
         {
             // invalid matrix shape
             std::vector<plssvm::aos_matrix<plssvm::real_type>> alpha(this->get_alpha());
             alpha.back() = plssvm::aos_matrix<plssvm::real_type>{ plssvm::shape{ 3, 2 } };
-            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
+            EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), alpha, this->get_index_sets(), this->get_data_set())),
                          "In case of OAO, each matrix may only contain one row!");
         }
     } else {
@@ -392,10 +403,10 @@ TYPED_TEST(LIBSVMClassificationModelDataWriteDeathTest, invalid_number_of_index_
 
     // try writing the LIBSVM model header
     if constexpr (classification == plssvm::classification_type::oaa) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      fmt::format("There shouldn't be any index sets for the OAA classification, but {} were found!", this->get_index_sets().size() - 1));
     } else if constexpr (classification == plssvm::classification_type::oao) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      ::testing::HasSubstr(fmt::format("The number of index sets ({}) must be equal to the number of different classes ({})!", index_sets.size(), this->get_index_sets().size())));
     } else {
         FAIL() << "Invalid classification_type!";
@@ -411,10 +422,10 @@ TYPED_TEST(LIBSVMClassificationModelDataWriteDeathTest, invalid_number_of_indice
 
     // try writing the LIBSVM model header
     if constexpr (classification == plssvm::classification_type::oaa) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      fmt::format("There shouldn't be any index sets for the OAA classification, but {} were found!", this->get_index_sets().size()));
     } else if constexpr (classification == plssvm::classification_type::oao) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      "Each data point must have exactly one entry in the index set!");
     } else {
         FAIL() << "Invalid classification_type!";
@@ -430,10 +441,10 @@ TYPED_TEST(LIBSVMClassificationModelDataWriteDeathTest, indices_not_sorted) {
 
     // try writing the LIBSVM model header
     if constexpr (classification == plssvm::classification_type::oaa) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      fmt::format("There shouldn't be any index sets for the OAA classification, but {} were found!", this->get_index_sets().size()));
     } else if constexpr (classification == plssvm::classification_type::oao) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      "All index sets must be sorted in ascending order!");
     } else {
         FAIL() << "Invalid classification_type!";
@@ -449,10 +460,10 @@ TYPED_TEST(LIBSVMClassificationModelDataWriteDeathTest, indices_in_one_index_set
 
     // try writing the LIBSVM model header
     if constexpr (classification == plssvm::classification_type::oaa) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      fmt::format("There shouldn't be any index sets for the OAA classification, but {} were found!", this->get_index_sets().size()));
     } else if constexpr (classification == plssvm::classification_type::oao) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      "All indices in one index set must be unique!");
     } else {
         FAIL() << "Invalid classification_type!";
@@ -468,10 +479,10 @@ TYPED_TEST(LIBSVMClassificationModelDataWriteDeathTest, index_sets_not_disjoint)
 
     // try writing the LIBSVM model header
     if constexpr (classification == plssvm::classification_type::oaa) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      fmt::format("There shouldn't be any index sets for the OAA classification, but {} were found!", this->get_index_sets().size()));
     } else if constexpr (classification == plssvm::classification_type::oao) {
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_classification(this->filename, this->get_comm(), this->get_params(), classification, this->get_rho(), this->get_alpha(), index_sets, this->get_data_set())),
                      fmt::format("All index sets must be pairwise unique, but index sets 0 and {} share at least one index!", this->get_data_set().num_classes() == 2 ? 1 : 2));
     } else {
         FAIL() << "Invalid classification_type!";
diff --git a/tests/detail/io/classification_libsvm_model_parsing/header_write.cpp b/tests/detail/io/classification_libsvm_model_parsing/header_write.cpp
index 19b2fac9c..1f6794aad 100644
--- a/tests/detail/io/classification_libsvm_model_parsing/header_write.cpp
+++ b/tests/detail/io/classification_libsvm_model_parsing/header_write.cpp
@@ -34,7 +34,18 @@
 
 template <typename T>
 class LIBSVMClassificationModelHeaderWrite : public ::testing::Test,
-                                             protected util::temporary_file { };
+                                             protected util::temporary_file {
+  public:
+    /**
+     * @brief Return the used MPI communicator.
+     * @return the MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const plssvm::mpi::communicator get_comm() const noexcept { return comm_; }
+
+  private:
+    /// The MPI communicator (unused during testing since we do not support MPI runtime tests).
+    plssvm::mpi::communicator comm_{};
+};
 
 TYPED_TEST_SUITE(LIBSVMClassificationModelHeaderWrite, util::classification_label_type_gtest, naming::test_parameter_to_name);
 
@@ -55,7 +66,7 @@ TYPED_TEST(LIBSVMClassificationModelHeaderWrite, write_linear) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, params, rho, data_set);
+    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // check returned label order
@@ -94,7 +105,7 @@ TYPED_TEST(LIBSVMClassificationModelHeaderWrite, write_polynomial) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, params, rho, data_set);
+    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // check returned label order
@@ -136,7 +147,7 @@ TYPED_TEST(LIBSVMClassificationModelHeaderWrite, write_rbf) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, params, rho, data_set);
+    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // check returned label order
@@ -176,7 +187,7 @@ TYPED_TEST(LIBSVMClassificationModelHeaderWrite, write_sigmoid) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, params, rho, data_set);
+    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // check returned label order
@@ -217,7 +228,7 @@ TYPED_TEST(LIBSVMClassificationModelHeaderWrite, write_laplacian) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, params, rho, data_set);
+    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // check returned label order
@@ -257,7 +268,7 @@ TYPED_TEST(LIBSVMClassificationModelHeaderWrite, write_chi_squared) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, params, rho, data_set);
+    const std::vector<label_type> &label_order = plssvm::detail::io::write_libsvm_model_header_classification(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // check returned label order
@@ -299,7 +310,7 @@ TYPED_TEST(LIBSVMClassificationModelHeaderWriteDeathTest, write_header_without_l
     fmt::ostream out = fmt::output_file(this->filename);
 
     // try writing the LIBSVM model header
-    EXPECT_DEATH(std::ignore = (plssvm::detail::io::write_libsvm_model_header_classification(out, params, rho, data_set)),
+    EXPECT_DEATH(std::ignore = (plssvm::detail::io::write_libsvm_model_header_classification(out, this->get_comm(), params, rho, data_set)),
                  "Cannot write a model file that does not include labels!");
 }
 
@@ -316,6 +327,6 @@ TYPED_TEST(LIBSVMClassificationModelHeaderWriteDeathTest, write_header_invalid_n
     fmt::ostream out = fmt::output_file(this->filename);
 
     // try writing the LIBSVM model header
-    EXPECT_DEATH(std::ignore = (plssvm::detail::io::write_libsvm_model_header_classification(out, params, rho, data_set)),
+    EXPECT_DEATH(std::ignore = (plssvm::detail::io::write_libsvm_model_header_classification(out, this->get_comm(), params, rho, data_set)),
                  ::testing::HasSubstr("At least one rho value must be provided!"));
 }
diff --git a/tests/detail/io/regression_libsvm_model_parsing/data_write.cpp b/tests/detail/io/regression_libsvm_model_parsing/data_write.cpp
index 1a21ca66c..adc49c81e 100644
--- a/tests/detail/io/regression_libsvm_model_parsing/data_write.cpp
+++ b/tests/detail/io/regression_libsvm_model_parsing/data_write.cpp
@@ -34,7 +34,18 @@
 template <typename T>
 class LIBSVMRegressionModelDataWrite : public ::testing::Test,
                                        private util::redirect_output<>,
-                                       protected util::temporary_file { };
+                                       protected util::temporary_file {
+  public:
+    /**
+     * @brief Return the used MPI communicator.
+     * @return the MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const plssvm::mpi::communicator get_comm() const noexcept { return comm_; }
+
+  private:
+    /// The MPI communicator (unused during testing since we do not support MPI runtime tests).
+    plssvm::mpi::communicator comm_{};
+};
 
 TYPED_TEST_SUITE(LIBSVMRegressionModelDataWrite, util::regression_label_type_gtest, naming::test_parameter_to_name);
 
@@ -52,7 +63,7 @@ TYPED_TEST(LIBSVMRegressionModelDataWrite, write) {
     const plssvm::regression_data_set<label_type> data_set{ data, label };
 
     // write the LIBSVM model file
-    plssvm::detail::io::write_libsvm_model_data_regression(this->filename, params, rho, alpha, data_set);
+    plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_comm(), params, rho, alpha, data_set);
 
     // read the written file
     plssvm::detail::io::file_reader reader{ this->filename };
@@ -107,7 +118,7 @@ TYPED_TEST(LIBSVMRegressionModelDataWrite, write_without_label) {
     const plssvm::regression_data_set<label_type> data_set{ data };
 
     // write the LIBSVM model file
-    plssvm::detail::io::write_libsvm_model_data_regression(this->filename, params, rho, alpha, data_set);
+    plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_comm(), params, rho, alpha, data_set);
 
     // read the written file
     plssvm::detail::io::file_reader reader{ this->filename };
@@ -195,7 +206,7 @@ TYPED_TEST_SUITE(LIBSVMRegressionModelDataWriteDeathTest, util::regression_label
 
 TYPED_TEST(LIBSVMRegressionModelDataWriteDeathTest, empty_filename) {
     // try writing the LIBSVM model header
-    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression("", this->get_params(), this->get_rho(), this->get_alpha(), this->get_data_set())),
+    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression("", this->get_comm(), this->get_params(), this->get_rho(), this->get_alpha(), this->get_data_set())),
                  "The provided model filename must not be empty!");
 }
 
@@ -204,7 +215,7 @@ TYPED_TEST(LIBSVMRegressionModelDataWriteDeathTest, invalid_number_of_rho_values
     const std::vector<plssvm::real_type> rho = util::generate_random_vector<plssvm::real_type>(42);
 
     // try writing the LIBSVM model header
-    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_params(), rho, this->get_alpha(), this->get_data_set())),
+    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_comm(), this->get_params(), rho, this->get_alpha(), this->get_data_set())),
                  "The number of rho values is 42 but must be exactly 1!");
 }
 
@@ -212,19 +223,19 @@ TYPED_TEST(LIBSVMRegressionModelDataWriteDeathTest, invalid_alpha_vector) {
     {
         // alpha vector too large
         const std::vector<plssvm::aos_matrix<plssvm::real_type>> alpha(2);
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_params(), this->get_rho(), alpha, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_comm(), this->get_params(), this->get_rho(), alpha, this->get_data_set())),
                      "The alpha vector may only contain one matrix as entry, but has 2!");
     }
     {
         // invalid number of rows in matrix
         const std::vector<plssvm::aos_matrix<plssvm::real_type>> alpha{ plssvm::aos_matrix<plssvm::real_type>{ plssvm::shape{ 42, 6 } } };
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_params(), this->get_rho(), alpha, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_comm(), this->get_params(), this->get_rho(), alpha, this->get_data_set())),
                      "The number of rows in the matrix must be 1, but is 42!");
     }
     {
         // invalid number of columns in matrix
         const std::vector<plssvm::aos_matrix<plssvm::real_type>> alpha{ plssvm::aos_matrix<plssvm::real_type>{ plssvm::shape{ 1, 42 } } };
-        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_params(), this->get_rho(), alpha, this->get_data_set())),
+        EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_data_regression(this->filename, this->get_comm(), this->get_params(), this->get_rho(), alpha, this->get_data_set())),
                      ::testing::HasSubstr("The number of weights (42) must be equal to the number of support vectors (6)!"));
     }
 }
diff --git a/tests/detail/io/regression_libsvm_model_parsing/header_parse_invalid.cpp b/tests/detail/io/regression_libsvm_model_parsing/header_parse_invalid.cpp
index 23cd66a6d..8deaccd06 100644
--- a/tests/detail/io/regression_libsvm_model_parsing/header_parse_invalid.cpp
+++ b/tests/detail/io/regression_libsvm_model_parsing/header_parse_invalid.cpp
@@ -12,8 +12,9 @@
 #include "plssvm/detail/io/regression_libsvm_model_parsing.hpp"  // functions to test
 #include "plssvm/exceptions/exceptions.hpp"                      // plssvm::invalid_file_format_exception
 
-#include "tests/custom_test_macros.hpp"  // EXPECT_THROW_WHAT
+#include "tests/custom_test_macros.hpp"  // EXPECT_THROW_WHAT, EXPECT_THROW_WHAT_MATCHER
 
+#include "gmock/gmock.h"  // ::testing::HasSubstr
 #include "gtest/gtest.h"  // TEST
 
 #include <string>  // std::string
@@ -268,3 +269,23 @@ TEST(LIBSVMRegressionModelHeaderParseInvalid, too_many_sv_according_to_header) {
                       plssvm::invalid_file_format_exception,
                       "Found 7 support vectors, but it should be 6!");
 }
+
+TEST(LIBSVMRegressionModelHeaderParseInvalid, wrong_nr_class) {
+    // parse the LIBSVM file
+    const std::string filename = PLSSVM_TEST_PATH "/data/model/regression/invalid/wrong_nr_class.libsvm.model";
+    plssvm::detail::io::file_reader reader{ filename };
+    reader.read_lines('#');
+    EXPECT_THROW_WHAT_MATCHER(std::ignore = (plssvm::detail::io::parse_libsvm_model_header_regression(reader.lines())),
+                              plssvm::invalid_file_format_exception,
+                              ::testing::HasSubstr("The number of classes (nr_class) is 3, but must be 2!"));
+}
+
+TEST(LIBSVMRegressionModelHeaderParseInvalid, wrong_num_rho) {
+    // parse the LIBSVM file
+    const std::string filename = PLSSVM_TEST_PATH "/data/model/regression/invalid/wrong_num_rho.libsvm.model";
+    plssvm::detail::io::file_reader reader{ filename };
+    reader.read_lines('#');
+    EXPECT_THROW_WHAT(std::ignore = (plssvm::detail::io::parse_libsvm_model_header_regression(reader.lines())),
+                      plssvm::invalid_file_format_exception,
+                      "Provided 2 rho values but only one is needed!");
+}
diff --git a/tests/detail/io/regression_libsvm_model_parsing/header_write.cpp b/tests/detail/io/regression_libsvm_model_parsing/header_write.cpp
index 834f7cd82..114aca439 100644
--- a/tests/detail/io/regression_libsvm_model_parsing/header_write.cpp
+++ b/tests/detail/io/regression_libsvm_model_parsing/header_write.cpp
@@ -30,7 +30,18 @@
 
 template <typename T>
 class LIBSVMRegressionModelHeaderWrite : public ::testing::Test,
-                                         protected util::temporary_file { };
+                                         protected util::temporary_file {
+  public:
+    /**
+     * @brief Return the used MPI communicator.
+     * @return the MPI communicator (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const plssvm::mpi::communicator get_comm() const noexcept { return comm_; }
+
+  private:
+    /// The MPI communicator (unused during testing since we do not support MPI runtime tests).
+    plssvm::mpi::communicator comm_{};
+};
 
 TYPED_TEST_SUITE(LIBSVMRegressionModelHeaderWrite, util::regression_label_type_gtest, naming::test_parameter_to_name);
 
@@ -48,7 +59,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_linear) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -78,7 +89,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_linear_without_label) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -109,7 +120,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_polynomial) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -142,7 +153,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_polynomial_without_label) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -176,7 +187,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_rbf) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -207,7 +218,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_rbf_without_label) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -239,7 +250,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_sigmoid) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -271,7 +282,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_sigmoid_without_label) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -304,7 +315,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_laplacian) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -335,7 +346,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_laplacian_without_label) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -367,7 +378,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_chi_squared) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -398,7 +409,7 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWrite, write_chi_squared_without_label) {
 
     // write the LIBSVM model to the temporary file
     fmt::ostream out = fmt::output_file(this->filename);
-    plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set);
+    plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set);
     out.close();
 
     // read the written file
@@ -435,6 +446,6 @@ TYPED_TEST(LIBSVMRegressionModelHeaderWriteDeathTest, write_header_invalid_numbe
     fmt::ostream out = fmt::output_file(this->filename);
 
     // try writing the LIBSVM model header
-    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_header_regression(out, params, rho, data_set)),
+    EXPECT_DEATH((plssvm::detail::io::write_libsvm_model_header_regression(out, this->get_comm(), params, rho, data_set)),
                  ::testing::HasSubstr("Exactly one rho value must be provided!"));
 }
diff --git a/tests/detail/logging.cpp b/tests/detail/logging/log.cpp
similarity index 81%
rename from tests/detail/logging.cpp
rename to tests/detail/logging/log.cpp
index d7f56c8be..57930d587 100644
--- a/tests/detail/logging.cpp
+++ b/tests/detail/logging/log.cpp
@@ -8,10 +8,11 @@
  * @brief Tests for the logging function.
  */
 
-#include "plssvm/detail/logging.hpp"
+#include "plssvm/detail/logging/log.hpp"
 
 #include "tests/utility.hpp"  // util::redirect_output
 
+#include "gmock/gmock.h"  // EXPECT_THAT, ::testing::HasSubstr
 #include "gtest/gtest.h"  // TEST_F, EXPECT_EQ, EXPECT_TRUE, ::testing::Test
 
 class Logger : public ::testing::Test,
@@ -72,3 +73,17 @@ TEST_F(Logger, mismatching_verbosity_level) {
     // there should not be any output
     EXPECT_TRUE(this->get_capture().empty());
 }
+
+class WarningLogger : public ::testing::Test,
+                      public util::redirect_output<&std::clog> { };
+
+TEST_F(WarningLogger, enabled_logging_warning) {
+    // explicitly enable logging
+    plssvm::verbosity = plssvm::verbosity_level::full;
+
+    // log a message
+    plssvm::detail::log(plssvm::verbosity_level::warning, "WARNING!");
+
+    // check captured output
+    EXPECT_THAT(this->get_capture(), ::testing::HasSubstr("WARNING!"));
+}
diff --git a/tests/detail/logging_without_performance_tracking.cpp b/tests/detail/logging/log_untracked.cpp
similarity index 79%
rename from tests/detail/logging_without_performance_tracking.cpp
rename to tests/detail/logging/log_untracked.cpp
index 84e3776dd..e32335b8a 100644
--- a/tests/detail/logging_without_performance_tracking.cpp
+++ b/tests/detail/logging/log_untracked.cpp
@@ -5,15 +5,16 @@
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
  *
- * @brief Tests for the logging function.
+ * @brief Tests for the logging function that includes tracking.
  */
 
-#include "plssvm/detail/logging_without_performance_tracking.hpp"
+#include "plssvm/detail/logging/log_untracked.hpp"
 
 #include "plssvm/verbosity_levels.hpp"  // plssvm::verbosity, plssvm::verbosity_level
 
 #include "tests/utility.hpp"  // util::redirect_output
 
+#include "gmock/gmock.h"  // EXPECT_THAT, ::testing::HasSubstr
 #include "gtest/gtest.h"  // TEST_F, EXPECT_EQ, EXPECT_TRUE, ::testing::Test
 
 class LoggerUntracked : public ::testing::Test,
@@ -74,3 +75,17 @@ TEST_F(LoggerUntracked, mismatching_verbosity_level) {
     // there should not be any output
     EXPECT_TRUE(this->get_capture().empty());
 }
+
+class WarningLoggerUntracked : public ::testing::Test,
+                               public util::redirect_output<&std::clog> { };
+
+TEST_F(WarningLoggerUntracked, enabled_logging_warning) {
+    // explicitly enable logging
+    plssvm::verbosity = plssvm::verbosity_level::full;
+
+    // log a message
+    plssvm::detail::log_untracked(plssvm::verbosity_level::warning, "WARNING!");
+
+    // check captured output
+    EXPECT_THAT(this->get_capture(), ::testing::HasSubstr("WARNING!"));
+}
diff --git a/tests/detail/logging/mpi_log.cpp b/tests/detail/logging/mpi_log.cpp
new file mode 100644
index 000000000..dc5d2d5a8
--- /dev/null
+++ b/tests/detail/logging/mpi_log.cpp
@@ -0,0 +1,91 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the MPI logging function.
+ */
+
+#include "plssvm/detail/logging/mpi_log.hpp"
+
+#include "plssvm/mpi/communicator.hpp"  // plssvm::mpi::communicator
+
+#include "tests/utility.hpp"  // util::redirect_output
+
+#include "gmock/gmock.h"  // EXPECT_THAT, ::testing::HasSubstr
+#include "gtest/gtest.h"  // TEST_F, EXPECT_EQ, EXPECT_TRUE, ::testing::Test
+
+class MPILogger : public ::testing::Test,
+                  public util::redirect_output<> { };
+
+TEST_F(MPILogger, enabled_logging) {
+    // explicitly enable logging
+    plssvm::verbosity = plssvm::verbosity_level::full;
+
+    // log a message
+    plssvm::detail::log(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "Hello, World!");
+
+    // check captured output
+    EXPECT_EQ(this->get_capture(), "Hello, World!");
+}
+
+TEST_F(MPILogger, enabled_logging_with_args) {
+    // explicitly enable logging
+    plssvm::verbosity = plssvm::verbosity_level::full;
+
+    // log a message
+    plssvm::detail::log(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "int: {}, float: {}, str: {}", 42, 1.5, "abc");
+
+    // check captured output
+    EXPECT_EQ(this->get_capture(), "int: 42, float: 1.5, str: abc");
+}
+
+TEST_F(MPILogger, disabled_logging) {
+    // explicitly disable logging
+    plssvm::verbosity = plssvm::verbosity_level::quiet;
+
+    // log message
+    plssvm::detail::log(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "Hello, World!");
+
+    // since logging has been disabled, nothing should have been captured
+    EXPECT_TRUE(this->get_capture().empty());
+}
+
+TEST_F(MPILogger, disabled_logging_with_args) {
+    // explicitly disable logging
+    plssvm::verbosity = plssvm::verbosity_level::quiet;
+
+    // log message
+    plssvm::detail::log(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "int: {}, float: {}, str: {}", 42, 1.5, "abc");
+
+    // since logging has been disabled, nothing should have been captured
+    EXPECT_TRUE(this->get_capture().empty());
+}
+
+TEST_F(MPILogger, mismatching_verbosity_level) {
+    // set verbosity_level to libsvm
+    plssvm::verbosity = plssvm::verbosity_level::libsvm;
+
+    // log message with full
+    plssvm::detail::log(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "Hello, World!");
+    plssvm::detail::log(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "int: {}, float: {}, str: {}", 42, 1.5, "abc");
+
+    // there should not be any output
+    EXPECT_TRUE(this->get_capture().empty());
+}
+
+class WarningMPILogger : public ::testing::Test,
+                         public util::redirect_output<&std::clog> { };
+
+TEST_F(WarningMPILogger, enabled_logging_warning) {
+    // explicitly enable logging
+    plssvm::verbosity = plssvm::verbosity_level::full;
+
+    // log a message
+    plssvm::detail::log(plssvm::verbosity_level::warning, plssvm::mpi::communicator{}, "WARNING!");
+
+    // check captured output
+    EXPECT_THAT(this->get_capture(), ::testing::HasSubstr("WARNING!"));
+}
diff --git a/tests/detail/logging/mpi_log_untracked.cpp b/tests/detail/logging/mpi_log_untracked.cpp
new file mode 100644
index 000000000..ec1be7a5d
--- /dev/null
+++ b/tests/detail/logging/mpi_log_untracked.cpp
@@ -0,0 +1,92 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the logging function that includes tracking.
+ */
+
+#include "plssvm/detail/logging/mpi_log_untracked.hpp"
+
+#include "plssvm/mpi/communicator.hpp"  // plssvm::mpi::communicator
+#include "plssvm/verbosity_levels.hpp"  // plssvm::verbosity, plssvm::verbosity_level
+
+#include "tests/utility.hpp"  // util::redirect_output
+
+#include "gmock/gmock.h"  // EXPECT_THAT, ::testing::HasSubstr
+#include "gtest/gtest.h"  // TEST_F, EXPECT_EQ, EXPECT_TRUE, ::testing::Test
+
+class MPILoggerUntracked : public ::testing::Test,
+                           public util::redirect_output<> { };
+
+TEST_F(MPILoggerUntracked, enabled_logging) {
+    // explicitly enable logging
+    plssvm::verbosity = plssvm::verbosity_level::full;
+
+    // log a message
+    plssvm::detail::log_untracked(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "Hello, World!");
+
+    // check captured output
+    EXPECT_EQ(this->get_capture(), "Hello, World!");
+}
+
+TEST_F(MPILoggerUntracked, enabled_logging_with_args) {
+    // explicitly enable logging
+    plssvm::verbosity = plssvm::verbosity_level::full;
+
+    // log a message
+    plssvm::detail::log_untracked(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "int: {}, float: {}, str: {}", 42, 1.5, "abc");
+
+    // check captured output
+    EXPECT_EQ(this->get_capture(), "int: 42, float: 1.5, str: abc");
+}
+
+TEST_F(MPILoggerUntracked, disabled_logging) {
+    // explicitly disable logging
+    plssvm::verbosity = plssvm::verbosity_level::quiet;
+
+    // log message
+    plssvm::detail::log_untracked(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "Hello, World!");
+
+    // since logging has been disabled, nothing should have been captured
+    EXPECT_TRUE(this->get_capture().empty());
+}
+
+TEST_F(MPILoggerUntracked, disabled_logging_with_args) {
+    // explicitly disable logging
+    plssvm::verbosity = plssvm::verbosity_level::quiet;
+
+    // log message
+    plssvm::detail::log_untracked(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "int: {}, float: {}, str: {}", 42, 1.5, "abc");
+
+    // since logging has been disabled, nothing should have been captured
+    EXPECT_TRUE(this->get_capture().empty());
+}
+
+TEST_F(MPILoggerUntracked, mismatching_verbosity_level) {
+    // set verbosity_level to libsvm
+    plssvm::verbosity = plssvm::verbosity_level::libsvm;
+
+    // log message with full
+    plssvm::detail::log_untracked(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "Hello, World!");
+    plssvm::detail::log_untracked(plssvm::verbosity_level::full, plssvm::mpi::communicator{}, "int: {}, float: {}, str: {}", 42, 1.5, "abc");
+
+    // there should not be any output
+    EXPECT_TRUE(this->get_capture().empty());
+}
+
+class WarningMPILoggerUntracked : public ::testing::Test,
+                                  public util::redirect_output<&std::clog> { };
+
+TEST_F(WarningMPILoggerUntracked, enabled_logging_warning) {
+    // explicitly enable logging
+    plssvm::verbosity = plssvm::verbosity_level::full;
+
+    // log a message
+    plssvm::detail::log_untracked(plssvm::verbosity_level::warning, plssvm::mpi::communicator{}, "WARNING!");
+
+    // check captured output
+    EXPECT_THAT(this->get_capture(), ::testing::HasSubstr("WARNING!"));
+}
diff --git a/tests/detail/move_only_any.cpp b/tests/detail/move_only_any.cpp
index 11d23b673..0202b63e8 100644
--- a/tests/detail/move_only_any.cpp
+++ b/tests/detail/move_only_any.cpp
@@ -22,7 +22,8 @@
 #include <vector>     // std::vector
 
 TEST(BadMoveOnlyCastException, exception) {
-    EXPECT_THROW_WHAT(throw plssvm::detail::bad_move_only_any_cast{}, plssvm::detail::bad_move_only_any_cast, "plssvm::detail::bad_move_only_any_cast");
+    const auto dummy = []() { throw plssvm::detail::bad_move_only_any_cast{}; };
+    EXPECT_THROW_WHAT(dummy(), plssvm::detail::bad_move_only_any_cast, "plssvm::detail::bad_move_only_any_cast");
 }
 
 TEST(MoveOnlyAny, default_construct) {
@@ -231,6 +232,12 @@ TEST(MoveOnlyAny, cast_const_pointer) {
     EXPECT_EQ(*ptr, 42);
 }
 
+TEST(MoveOnlyAny, cast_const_nullptr_pointer) {
+    // casting a nullptr should return a nullptr
+    const plssvm::detail::move_only_any *a{ nullptr };
+    EXPECT_EQ(plssvm::detail::move_only_any_cast<const int>(a), nullptr);
+}
+
 TEST(MoveOnlyAny, cast_const_pointer_wrong_type) {
     // create const move_only_any object
     const plssvm::detail::move_only_any a{ 42 };
@@ -246,6 +253,12 @@ TEST(MoveOnlyAny, cast_pointer) {
     EXPECT_EQ(*plssvm::detail::move_only_any_cast<const int>(&a), 42);
 }
 
+TEST(MoveOnlyAny, cast_nullptr_pointer) {
+    // casting a nullptr should return a nullptr
+    plssvm::detail::move_only_any *a{ nullptr };
+    EXPECT_EQ(plssvm::detail::move_only_any_cast<int>(a), nullptr);
+}
+
 TEST(MoveOnlyAny, cast_pointer_wrong_type) {
     // create const move_only_any object
     plssvm::detail::move_only_any a{ 42 };
diff --git a/tests/detail/tracking/events.cpp b/tests/detail/tracking/events.cpp
index 293f019a9..9c9baad82 100644
--- a/tests/detail/tracking/events.cpp
+++ b/tests/detail/tracking/events.cpp
@@ -212,6 +212,17 @@ TEST_F(Events, generate_yaml_string) {
     EXPECT_EQ(yaml, correct_yaml);
 }
 
+TEST_F(Events, generate_yaml_string_no_events) {
+    // create events wrapper
+    plssvm::detail::tracking::events events{};
+
+    // get the YAML string
+    const std::string yaml = events.generate_yaml_string(std::chrono::steady_clock::now());
+
+    // check for equality
+    EXPECT_EQ(yaml, std::string{});
+}
+
 TEST_F(Events, output_operator) {
     const std::chrono::steady_clock::time_point time1 = std::chrono::steady_clock::now();
     const std::chrono::steady_clock::time_point time2 = std::chrono::steady_clock::now();
diff --git a/tests/detail/tracking/performance_tracker.cpp b/tests/detail/tracking/performance_tracker.cpp
index 560aae542..31a0c3d83 100644
--- a/tests/detail/tracking/performance_tracker.cpp
+++ b/tests/detail/tracking/performance_tracker.cpp
@@ -16,13 +16,14 @@
 #include "plssvm/detail/io/file_reader.hpp"      // plssvm::detail::io::file_reader
 #include "plssvm/detail/memory_size.hpp"         // plssvm::detail::memory_size (literals)
 #include "plssvm/detail/tracking/events.hpp"     // plssvm::detail::tracking::{events, event}
+#include "plssvm/mpi/communicator.hpp"           // plssvm::mpi::communicator
 
 #include "tests/naming.hpp"         // naming::test_parameter_to_name
 #include "tests/types_to_test.hpp"  // util::{label_type_gtest, test_parameter_type_at_t}
 #include "tests/utility.hpp"        // util::redirect_output
 
 #include "fmt/format.h"   // fmt::format
-#include "gmock/gmock.h"  // EXPECT_CALL, EXPECT_THAT, ::testing::{HasSubstr}
+#include "gmock/gmock.h"  // EXPECT_CALL, EXPECT_THAT, ::testing::{HasSubstr, ContainsRegex}
 #include "gtest/gtest.h"  // TEST, TYPED_TEST_SUITE, TYPED_TEST, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE, ::testing::Test, ::testing::An
 
 #include <algorithm>   // std::transform
@@ -109,6 +110,128 @@ class PerformanceTracker : public ::testing::Test,
     plssvm::detail::tracking::performance_tracker tracker_{};
 };
 
+TEST_F(PerformanceTracker, copy_construct) {
+    // get performance tracker from fixture class
+    plssvm::detail::tracking::performance_tracker &tracker = this->get_performance_tracker();
+
+    // add different tracking entries
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "bar", 42 });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "baz", 3.1415 });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "mem", 1_KiB });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'a' });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'b' });
+
+    // copy-construct new performance tracker
+    const plssvm::detail::tracking::performance_tracker tracker2{ tracker };
+
+    // check the contents
+    EXPECT_EQ(tracker2.get_tracking_entries(), tracker.get_tracking_entries());
+    ASSERT_EQ(tracker2.get_events().num_events(), tracker.get_events().num_events());
+    for (std::size_t i = 0; i < tracker.get_events().num_events(); ++i) {
+        EXPECT_EQ(tracker2.get_events()[i].time_point, tracker.get_events()[i].time_point);
+        EXPECT_EQ(tracker2.get_events()[i].name, tracker.get_events()[i].name);
+    }
+    EXPECT_EQ(tracker2.get_reference_time(), tracker.get_reference_time());
+    EXPECT_EQ(tracker2.is_tracking(), tracker.is_tracking());
+}
+
+TEST_F(PerformanceTracker, move_construct) {
+    // get performance tracker from fixture class
+    plssvm::detail::tracking::performance_tracker &tracker = this->get_performance_tracker();
+
+    // add different tracking entries
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "bar", 42 });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "baz", 3.1415 });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "mem", 1_KiB });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'a' });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'b' });
+
+    // save (i.e. copy contents as ground truth)
+    const auto entries = tracker.get_tracking_entries();
+    const auto events = tracker.get_events();
+    const auto reference_time = tracker.get_reference_time();
+    const bool is_tracking = tracker.is_tracking();
+
+    // move-construct new performance tracker
+    const plssvm::detail::tracking::performance_tracker tracker2{ std::move(tracker) };
+
+    // check the contents
+    EXPECT_EQ(tracker2.get_tracking_entries(), entries);
+    ASSERT_EQ(tracker2.get_events().num_events(), events.num_events());
+    for (std::size_t i = 0; i < events.num_events(); ++i) {
+        EXPECT_EQ(tracker2.get_events()[i].time_point, events[i].time_point);
+        EXPECT_EQ(tracker2.get_events()[i].name, events[i].name);
+    }
+    EXPECT_EQ(tracker2.get_reference_time(), reference_time);
+    EXPECT_EQ(tracker2.is_tracking(), is_tracking);
+
+    // check moved-from state
+    EXPECT_TRUE(tracker.get_tracking_entries().empty());
+    EXPECT_TRUE(tracker.get_events().empty());
+}
+
+TEST_F(PerformanceTracker, copy_assign) {
+    // get performance tracker from fixture class
+    plssvm::detail::tracking::performance_tracker &tracker = this->get_performance_tracker();
+
+    // add different tracking entries
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "bar", 42 });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "baz", 3.1415 });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "mem", 1_KiB });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'a' });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'b' });
+
+    // default-construct new performance tracker then copy-assign another performance tracker
+    plssvm::detail::tracking::performance_tracker tracker2{};
+    tracker2 = tracker;
+
+    // check the contents
+    EXPECT_EQ(tracker2.get_tracking_entries(), tracker.get_tracking_entries());
+    ASSERT_EQ(tracker2.get_events().num_events(), tracker.get_events().num_events());
+    for (std::size_t i = 0; i < tracker.get_events().num_events(); ++i) {
+        EXPECT_EQ(tracker2.get_events()[i].time_point, tracker.get_events()[i].time_point);
+        EXPECT_EQ(tracker2.get_events()[i].name, tracker.get_events()[i].name);
+    }
+    EXPECT_EQ(tracker2.get_reference_time(), tracker.get_reference_time());
+    EXPECT_EQ(tracker2.is_tracking(), tracker.is_tracking());
+}
+
+TEST_F(PerformanceTracker, move_assign) {
+    // get performance tracker from fixture class
+    plssvm::detail::tracking::performance_tracker &tracker = this->get_performance_tracker();
+
+    // add different tracking entries
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "bar", 42 });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "baz", 3.1415 });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "mem", 1_KiB });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'a' });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'b' });
+
+    // save (i.e. copy contents as ground truth)
+    const auto entries = tracker.get_tracking_entries();
+    const auto events = tracker.get_events();
+    const auto reference_time = tracker.get_reference_time();
+    const bool is_tracking = tracker.is_tracking();
+
+    // default-construct new performance tracker then move-assign another performance tracker
+    plssvm::detail::tracking::performance_tracker tracker2{};
+    tracker2 = std::move(tracker);
+
+    // check the contents
+    EXPECT_EQ(tracker2.get_tracking_entries(), entries);
+    ASSERT_EQ(tracker2.get_events().num_events(), events.num_events());
+    for (std::size_t i = 0; i < events.num_events(); ++i) {
+        EXPECT_EQ(tracker2.get_events()[i].time_point, events[i].time_point);
+        EXPECT_EQ(tracker2.get_events()[i].name, events[i].name);
+    }
+    EXPECT_EQ(tracker2.get_reference_time(), reference_time);
+    EXPECT_EQ(tracker2.is_tracking(), is_tracking);
+
+    // check moved-from state
+    EXPECT_TRUE(tracker.get_tracking_entries().empty());
+    EXPECT_TRUE(tracker.get_events().empty());
+}
+
 // the macros are only available if PLSSVM_PERFORMANCE_TRACKER_ENABLED is defined!
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
 
@@ -330,7 +453,7 @@ TEST_F(PerformanceTracker, add_parser_train_tracking_entry) {
     std::transform(input_argv.begin(), input_argv.end(), argv.begin(), [](std::string &str) { return str.data(); });
     const auto argc = static_cast<int>(argv.size());
 
-    const plssvm::detail::cmd::parser_train parser{ argc, argv.data() };
+    const plssvm::detail::cmd::parser_train parser{ plssvm::mpi::communicator{}, argc, argv.data() };
 
     // save cmd::parser_train entry
     tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "parameter", "", parser });
@@ -354,7 +477,7 @@ TEST_F(PerformanceTracker, add_parser_predict_tracking_entry) {
     std::transform(input_argv.begin(), input_argv.end(), argv.begin(), [](std::string &str) { return str.data(); });
     const auto argc = static_cast<int>(argv.size());
 
-    const plssvm::detail::cmd::parser_predict parser{ argc, argv.data() };
+    const plssvm::detail::cmd::parser_predict parser{ plssvm::mpi::communicator{}, argc, argv.data() };
 
     // save cmd::parser_predict entry
     tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "parameter", "", parser });
@@ -378,7 +501,7 @@ TEST_F(PerformanceTracker, add_parser_scale_tracking_entry) {
     std::transform(input_argv.begin(), input_argv.end(), argv.begin(), [](std::string &str) { return str.data(); });
     const auto argc = static_cast<int>(argv.size());
 
-    const plssvm::detail::cmd::parser_scale parser{ argc, argv.data() };
+    const plssvm::detail::cmd::parser_scale parser{ plssvm::mpi::communicator{}, argc, argv.data() };
 
     // save cmd::parser_scale entry
     tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "parameter", "", parser });
@@ -456,6 +579,8 @@ TEST_F(PerformanceTracker, save_entries_to_file) {
     tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "mem", 1_KiB });
     tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'a' });
     tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'b' });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "dependencies", "backend", "one" });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "dependencies", "backend", "two" });
     tracker.save(tmp_file.filename);
 
     // the file must not be empty
@@ -471,9 +596,11 @@ TEST_F(PerformanceTracker, save_entries_to_file) {
     EXPECT_THAT(reader.buffer(), ::testing::HasSubstr("baz: 3.1415"));
     EXPECT_THAT(reader.buffer(), ::testing::HasSubstr("mem: 1024"));
     EXPECT_THAT(reader.buffer(), ::testing::HasSubstr("foobar: [a, b]"));
+    EXPECT_THAT(reader.buffer(), ::testing::HasSubstr("dependencies:"));
+    EXPECT_THAT(reader.buffer(), ::testing::ContainsRegex("backend: .*one, two"));
 
     // the tracking entries must not have changed
-    EXPECT_EQ(tracker.get_tracking_entries().size(), 2);
+    EXPECT_EQ(tracker.get_tracking_entries().size(), 3);
 }
 
 TEST_F(PerformanceTracker, save_entries_empty_file) {
@@ -486,6 +613,8 @@ TEST_F(PerformanceTracker, save_entries_empty_file) {
     tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "foo", "mem", 1_KiB });
     tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'a' });
     tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "", "foobar", 'b' });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "dependencies", "backend", "one" });
+    tracker.add_tracking_entry(plssvm::detail::tracking::tracking_entry{ "dependencies", "backend", "two" });
     // save to empty file, i.e., dump the performance tracking entries to std::clog
     tracker.save("");
 
@@ -498,9 +627,11 @@ TEST_F(PerformanceTracker, save_entries_empty_file) {
     EXPECT_THAT(this->get_capture(), ::testing::HasSubstr("baz: 3.1415"));
     EXPECT_THAT(this->get_capture(), ::testing::HasSubstr("mem: 1024"));
     EXPECT_THAT(this->get_capture(), ::testing::HasSubstr("foobar: [a, b]"));
+    EXPECT_THAT(this->get_capture(), ::testing::HasSubstr("dependencies:"));
+    EXPECT_THAT(this->get_capture(), ::testing::ContainsRegex("backend: .*one, two"));
 
     // the tracking entries must not have changed
-    EXPECT_EQ(tracker.get_tracking_entries().size(), 2);
+    EXPECT_EQ(tracker.get_tracking_entries().size(), 3);
 }
 
 TEST_F(PerformanceTracker, get_tracking_entries) {
diff --git a/tests/environment.cpp b/tests/environment.cpp
new file mode 100644
index 000000000..1ca3ab44d
--- /dev/null
+++ b/tests/environment.cpp
@@ -0,0 +1,157 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for functions related to the environment setup and teardown.
+ */
+
+#include "plssvm/environment.hpp"
+
+#include "plssvm/backend_types.hpp"          // plssvm::backend_type, plssvm::list_available_backends
+#include "plssvm/detail/utility.hpp"         // plssvm::detail::contains
+#include "plssvm/exceptions/exceptions.hpp"  // plssvm::environment_exception
+
+#include "tests/custom_test_macros.hpp"  // EXPECT_CONVERSION_TO_STRING, EXPECT_CONVERSION_FROM_STRING, EXPECT_THROW_WHAT
+
+#include "gtest/gtest.h"  // TEST, EXPECT_EQ, EXPECT_NE, EXPECT_DEATH
+
+#include <tuple>   // std::ignore
+#include <vector>  // std::vector
+
+// check whether the plssvm::environment::status -> std::string conversions are correct
+TEST(EnvironmentStatus, to_string) {
+    // check conversions to std::string
+    EXPECT_CONVERSION_TO_STRING(plssvm::environment::status::uninitialized, "uninitialized");
+    EXPECT_CONVERSION_TO_STRING(plssvm::environment::status::initialized, "initialized");
+    EXPECT_CONVERSION_TO_STRING(plssvm::environment::status::finalized, "finalized");
+    EXPECT_CONVERSION_TO_STRING(plssvm::environment::status::unnecessary, "unnecessary");
+}
+
+TEST(EnvironmentStatus, to_string_unknown) {
+    // check conversions to std::string from unknown environment status
+    EXPECT_CONVERSION_TO_STRING(static_cast<plssvm::environment::status>(4), "unknown");
+}
+
+// check whether the std::string -> plssvm::environment::status conversions are correct
+TEST(EnvironmentStatus, from_string) {
+    // check conversion from std::string
+    EXPECT_CONVERSION_FROM_STRING("uninitialized", plssvm::environment::status::uninitialized);
+    EXPECT_CONVERSION_FROM_STRING("UNINITIALIZED", plssvm::environment::status::uninitialized);
+    EXPECT_CONVERSION_FROM_STRING("initialized", plssvm::environment::status::initialized);
+    EXPECT_CONVERSION_FROM_STRING("INITIALIZED", plssvm::environment::status::initialized);
+    EXPECT_CONVERSION_FROM_STRING("finalized", plssvm::environment::status::finalized);
+    EXPECT_CONVERSION_FROM_STRING("FINALIZED", plssvm::environment::status::finalized);
+    EXPECT_CONVERSION_FROM_STRING("unnecessary", plssvm::environment::status::unnecessary);
+    EXPECT_CONVERSION_FROM_STRING("UNNECESSARY", plssvm::environment::status::unnecessary);
+}
+
+TEST(EnvironmentStatus, from_string_unknown) {
+    // foo isn't a valid environment status
+    std::istringstream input{ "foo" };
+    plssvm::environment::status status{};
+    input >> status;
+    EXPECT_TRUE(input.fail());
+}
+
+TEST(Environment, get_backend_status) {
+    // check the backend statis for all supported backends
+
+    // the automatic backend may not be used and throws an exception
+    EXPECT_THROW_WHAT(std::ignore = plssvm::environment::get_backend_status(plssvm::backend_type::automatic), plssvm::environment_exception, "Can't retrieve the environment status for the automatic backend!");
+
+    // must be always status::unnecessary for the following backends
+    EXPECT_EQ(plssvm::environment::get_backend_status(plssvm::backend_type::openmp), plssvm::environment::status::unnecessary);
+    EXPECT_EQ(plssvm::environment::get_backend_status(plssvm::backend_type::stdpar), plssvm::environment::status::unnecessary);
+    EXPECT_EQ(plssvm::environment::get_backend_status(plssvm::backend_type::cuda), plssvm::environment::status::unnecessary);
+    EXPECT_EQ(plssvm::environment::get_backend_status(plssvm::backend_type::hip), plssvm::environment::status::unnecessary);
+    EXPECT_EQ(plssvm::environment::get_backend_status(plssvm::backend_type::opencl), plssvm::environment::status::unnecessary);
+    EXPECT_EQ(plssvm::environment::get_backend_status(plssvm::backend_type::sycl), plssvm::environment::status::unnecessary);
+
+    // HPX and Kokkos need some form of initialization IF THEY ARE ENABLED
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    EXPECT_NE(plssvm::environment::get_backend_status(plssvm::backend_type::hpx), plssvm::environment::status::unnecessary);
+#else
+    EXPECT_EQ(plssvm::environment::get_backend_status(plssvm::backend_type::hpx), plssvm::environment::status::unnecessary);
+#endif
+
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    EXPECT_NE(plssvm::environment::get_backend_status(plssvm::backend_type::kokkos), plssvm::environment::status::unnecessary);
+#else
+    EXPECT_EQ(plssvm::environment::get_backend_status(plssvm::backend_type::kokkos), plssvm::environment::status::unnecessary);
+#endif
+}
+
+TEST(EnvironmentDeathTest, initialize_backend) {
+    // the function may never be called with the automatic backend
+    EXPECT_DEATH(plssvm::environment::detail::initialize_backend(plssvm::backend_type::automatic), "The automatic backend may never be initialized!");
+}
+
+TEST(EnvironmentDeathTest, finalize_backend) {
+    // the function may never be called with the automatic backend
+    EXPECT_DEATH(plssvm::environment::detail::finalize_backend(plssvm::backend_type::automatic), "The automatic backend may never be finalized!");
+}
+
+TEST(Environment, initialize_impl) {
+    // the function may never be called with a backend that hasn't been enabled
+    const std::vector<plssvm::backend_type> all_backends{
+        plssvm::backend_type::openmp,
+        plssvm::backend_type::stdpar,
+        plssvm::backend_type::hpx,
+        plssvm::backend_type::cuda,
+        plssvm::backend_type::hip,
+        plssvm::backend_type::opencl,
+        plssvm::backend_type::sycl,
+        plssvm::backend_type::kokkos
+    };
+    const std::vector<plssvm::backend_type> available_backends = plssvm::list_available_backends();
+
+    // iterate over all backends and check whether it is available
+    for (const plssvm::backend_type backend : all_backends) {
+        if (!plssvm::detail::contains(available_backends, backend)) {
+            // backend is not available -> it cannot be initialized
+            EXPECT_THROW_WHAT(plssvm::environment::detail::initialize_impl(std::vector<plssvm::backend_type>{ backend }),
+                              plssvm::environment_exception,
+                              fmt::format("The provided backend {} is currently not available and, therefore, can't be initialized! Available backends are: [{}].", backend, fmt::join(available_backends, ", ")));
+        }
+    }
+}
+
+TEST(Environment, initialize_impl_automatic) {
+    // the function may never be called with the automatic backend
+    const std::vector<plssvm::backend_type> backends{ plssvm::backend_type::automatic };
+    EXPECT_THROW_WHAT(plssvm::environment::detail::initialize_impl(backends), plssvm::environment_exception, "The automatic backend cannot be initialized!");
+}
+
+TEST(Environment, finalize) {
+    // the function may never be called with a backend that hasn't been enabled
+    const std::vector<plssvm::backend_type> all_backends{
+        plssvm::backend_type::openmp,
+        plssvm::backend_type::stdpar,
+        plssvm::backend_type::hpx,
+        plssvm::backend_type::cuda,
+        plssvm::backend_type::hip,
+        plssvm::backend_type::opencl,
+        plssvm::backend_type::sycl,
+        plssvm::backend_type::kokkos
+    };
+    const std::vector<plssvm::backend_type> available_backends = plssvm::list_available_backends();
+
+    // iterate over all backends and check whether it is available
+    for (const plssvm::backend_type backend : all_backends) {
+        if (!plssvm::detail::contains(available_backends, backend)) {
+            // backend is not available -> it cannot be initialized
+            EXPECT_THROW_WHAT(plssvm::environment::finalize(std::vector<plssvm::backend_type>{ backend }),
+                              plssvm::environment_exception,
+                              fmt::format("The provided backend {} is currently not available and, therefore, can't be finalized! Available backends are: [{}].", backend, fmt::join(available_backends, ", ")));
+        }
+    }
+}
+
+TEST(Environment, finalize_automatic) {
+    // the function may never be called with the automatic backend
+    const std::vector<plssvm::backend_type> backends{ plssvm::backend_type::automatic };
+    EXPECT_THROW_WHAT(plssvm::environment::finalize(backends), plssvm::environment_exception, "The automatic backend cannot be finalized!");
+}
diff --git a/tests/exceptions/exceptions.cpp b/tests/exceptions/exceptions.cpp
index a061eddeb..69bf8d9d3 100644
--- a/tests/exceptions/exceptions.cpp
+++ b/tests/exceptions/exceptions.cpp
@@ -37,7 +37,8 @@ using exception_types_list = std::tuple<plssvm::exception, plssvm::invalid_param
                                         plssvm::data_set_exception, plssvm::min_max_scaler_exception, plssvm::file_not_found_exception,
                                         plssvm::invalid_file_format_exception, plssvm::unsupported_backend_exception, plssvm::unsupported_kernel_type_exception,
                                         plssvm::gpu_device_ptr_exception, plssvm::matrix_exception, plssvm::kernel_launch_resources,
-                                        plssvm::classification_report_exception, plssvm::platform_devices_empty, plssvm::environment_exception>;
+                                        plssvm::classification_report_exception, plssvm::regression_report_exception, plssvm::platform_devices_empty, plssvm::environment_exception,
+                                        plssvm::mpi_exception>;
 using exception_types_gtest = util::combine_test_parameters_gtest_t<util::cartesian_type_product_t<exception_types_list>>;
 // clang-format on
 
@@ -90,3 +91,46 @@ TYPED_TEST(Exceptions, exception_what_with_source_location) {
     EXPECT_THAT(std::string{ what_lines[3] }, ::testing::ContainsRegex("  in function  .*dummy.*"));
     EXPECT_THAT(std::string{ what_lines[4] }, ::testing::StartsWith("  @ line       "));  // attention: some line must be given, hardcoded value not feasible
 }
+
+// helper function returning an exception used to be able to name the source location function
+plssvm::cmd_parser_exit dummy_exit(const int exit_code) {
+    return plssvm::cmd_parser_exit{ exit_code };
+}
+
+// check whether throwing exceptions works as intended
+TEST(CMDParserExitException, throwing_excpetion) {
+    // throw the specified exception
+    const auto dummy_exit = []() { throw plssvm::cmd_parser_exit{ 1 }; };
+    EXPECT_THROW_WHAT(dummy_exit(), plssvm::cmd_parser_exit, "exit code: 1");
+}
+
+// check whether the source location information are populated correctly
+TEST(CMDParserExitException, exception_source_location) {
+    const auto exc = dummy_exit(2);
+
+    EXPECT_EQ(exc.loc().file_name(), std::string{ __builtin_FILE() });
+    EXPECT_THAT(exc.loc().function_name(), ::testing::HasSubstr("dummy_exit"));
+    EXPECT_GT(exc.loc().line(), std::uint_least32_t{ 0 });    // attention: some line must be given, hardcoded value not feasible
+    EXPECT_EQ(exc.loc().column(), std::uint_least32_t{ 0 });  // attention: always 0!
+    EXPECT_EQ(exc.exit_code(), 2);
+}
+
+// check whether what message including the source location information is assembled correctly
+TEST(CMDParserExitException, exception_what_with_source_location) {
+    const auto exc = dummy_exit(0);
+
+    // get exception message with source location information split into a vector of separate lines
+    const std::string what = exc.what_with_loc();
+    const std::vector<std::string_view> what_lines = plssvm::detail::split(what, '\n');
+
+    // check the number of lines in the "what" message
+    ASSERT_EQ(what_lines.size(), 5);
+
+    // check the "what" message content
+    EXPECT_EQ(what_lines[0], std::string{ "exit code: 0" });
+    EXPECT_EQ(what_lines[1], fmt::format("{} thrown:", util::exception_type_name<plssvm::cmd_parser_exit>()));
+    EXPECT_EQ(what_lines[2], fmt::format("  in file      {}", __builtin_FILE()));
+    EXPECT_THAT(std::string{ what_lines[3] }, ::testing::ContainsRegex("  in function  .*dummy_exit.*"));
+    EXPECT_THAT(std::string{ what_lines[4] }, ::testing::StartsWith("  @ line       "));  // attention: some line must be given, hardcoded value not feasible
+    EXPECT_EQ(exc.exit_code(), 0);
+}
diff --git a/tests/exceptions/source_location.cpp b/tests/exceptions/source_location.cpp
index 9e91b39ad..1e8bc2e25 100644
--- a/tests/exceptions/source_location.cpp
+++ b/tests/exceptions/source_location.cpp
@@ -10,30 +10,45 @@
 
 #include "plssvm/exceptions/source_location.hpp"  // plssvm::source_location
 
+#include "plssvm/mpi/environment.hpp"  // plssvm::mpi::is_active
+
 #include "gmock/gmock.h"  // EXPECT_THAT, ::testing::HasSubstr
 #include "gtest/gtest.h"  // TEST, EXPECT_EQ
 
 #include <cstdint>  // std::uint_least32_t
 
 // dummy function to be able to specify the function name
-constexpr plssvm::source_location dummy() {
+[[nodiscard]] plssvm::source_location dummy() {
     return plssvm::source_location::current();
 }
 
 TEST(SourceLocation, default_construct) {
-    constexpr plssvm::source_location loc{};
+    const plssvm::source_location loc{};
 
     EXPECT_EQ(loc.file_name(), std::string{ "unknown" });
     EXPECT_EQ(loc.function_name(), std::string{ "unknown" });
     EXPECT_EQ(loc.line(), std::uint_least32_t{ 0 });
     EXPECT_EQ(loc.column(), std::uint_least32_t{ 0 });
+    EXPECT_FALSE(loc.world_rank().has_value());
 }
 
 TEST(SourceLocation, current_location) {
-    constexpr plssvm::source_location loc = dummy();
+    const plssvm::source_location loc = dummy();
 
     EXPECT_EQ(loc.file_name(), __builtin_FILE());
     EXPECT_THAT(loc.function_name(), ::testing::HasSubstr("dummy"));
-    EXPECT_EQ(loc.line(), std::uint_least32_t{ 20 });   // attention: hardcoded line!
+    EXPECT_EQ(loc.line(), std::uint_least32_t{ 22 });   // attention: hardcoded line!
     EXPECT_EQ(loc.column(), std::uint_least32_t{ 0 });  // attention: always 0!
+
+    if (plssvm::mpi::is_active()) {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+        ASSERT_TRUE(loc.world_rank().has_value());
+        // since MPI is disabled, the world rank must always be zero
+        EXPECT_EQ(loc.world_rank().value(), 0);
+#else
+        EXPECT_FALSE(loc.world_rank().has_value());
+#endif
+    } else {
+        EXPECT_FALSE(loc.world_rank().has_value());
+    }
 }
diff --git a/tests/exceptions/utility.hpp b/tests/exceptions/utility.hpp
index 52aaa14c7..324f2b289 100644
--- a/tests/exceptions/utility.hpp
+++ b/tests/exceptions/utility.hpp
@@ -41,6 +41,7 @@ template <typename T>
 
 // create exception type -> string mapping for all custom exception types
 PLSSVM_CREATE_EXCEPTION_TYPE_NAME(exception)
+PLSSVM_CREATE_EXCEPTION_TYPE_NAME(cmd_parser_exit)
 PLSSVM_CREATE_EXCEPTION_TYPE_NAME(invalid_parameter_exception)
 PLSSVM_CREATE_EXCEPTION_TYPE_NAME(file_reader_exception)
 PLSSVM_CREATE_EXCEPTION_TYPE_NAME(data_set_exception)
@@ -53,8 +54,10 @@ PLSSVM_CREATE_EXCEPTION_TYPE_NAME(gpu_device_ptr_exception)
 PLSSVM_CREATE_EXCEPTION_TYPE_NAME(matrix_exception)
 PLSSVM_CREATE_EXCEPTION_TYPE_NAME(kernel_launch_resources)
 PLSSVM_CREATE_EXCEPTION_TYPE_NAME(classification_report_exception)
+PLSSVM_CREATE_EXCEPTION_TYPE_NAME(regression_report_exception)
 PLSSVM_CREATE_EXCEPTION_TYPE_NAME(platform_devices_empty)
 PLSSVM_CREATE_EXCEPTION_TYPE_NAME(environment_exception)
+PLSSVM_CREATE_EXCEPTION_TYPE_NAME(mpi_exception)
 
 }  // namespace util
 
diff --git a/tests/gamma.cpp b/tests/gamma.cpp
index 3b5b4a44b..15e1b9f84 100644
--- a/tests/gamma.cpp
+++ b/tests/gamma.cpp
@@ -131,6 +131,19 @@ TEST(GammaType, calculate_gamma_value_gamma_coefficient_type_scale) {
     EXPECT_FLOATING_POINT_NEAR(plssvm::calculate_gamma_value(gamma_value, matr), plssvm::real_type{ 0.047505938242280283668 });
 }
 
+TEST(GammaType, calculate_gamma_value_invalid_gamma_coefficient_type) {
+    // create a gamma_type with a real_type value
+    const plssvm::gamma_type gamma_value = static_cast<plssvm::gamma_coefficient_type>(2);
+
+    // create a dummy matrix representing the actual data
+    const auto matr = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 8, 4 });
+
+    // the std::variant must hold the gamma_coefficient_type member
+    ASSERT_TRUE(std::holds_alternative<plssvm::gamma_coefficient_type>(gamma_value));
+    // check the variant value -> must be 1.0 for invalid gamma values
+    EXPECT_EQ(plssvm::calculate_gamma_value(gamma_value, matr), plssvm::real_type{ 1.0 });
+}
+
 TEST(GammaType, get_gamma_string_real_type) {
     // create a gamma_type with a real_type value
     const plssvm::gamma_type gamma_value = plssvm::real_type{ 1.5 };
@@ -145,7 +158,7 @@ TEST(GammaType, get_gamma_string_gamma_coefficient_type_automatic) {
     // create a gamma_type with a real_type value
     const plssvm::gamma_type gamma_value = plssvm::gamma_coefficient_type::automatic;
 
-    // the std::variant must hold the real_type member
+    // the std::variant must hold the gamma_coefficient_type member
     ASSERT_TRUE(std::holds_alternative<plssvm::gamma_coefficient_type>(gamma_value));
     // check the variant string
     EXPECT_EQ(plssvm::get_gamma_string(gamma_value), std::string{ "\"1 / num_features\"" });
@@ -155,8 +168,18 @@ TEST(GammaType, get_gamma_string_gamma_coefficient_type_scale) {
     // create a gamma_type with a real_type value
     const plssvm::gamma_type gamma_value = plssvm::gamma_coefficient_type::scale;
 
-    // the std::variant must hold the real_type member
+    // the std::variant must hold the gamma_coefficient_type member
     ASSERT_TRUE(std::holds_alternative<plssvm::gamma_coefficient_type>(gamma_value));
     // check the variant string
     EXPECT_EQ(plssvm::get_gamma_string(gamma_value), std::string{ "\"1 / (num_features * variance(input_data))\"" });
 }
+
+TEST(GammaType, get_gamma_string_invalid_gamma_coefficient_type) {
+    // create a gamma_type with a real_type value
+    const plssvm::gamma_type gamma_value = static_cast<plssvm::gamma_coefficient_type>(2);
+
+    // the std::variant must hold the gamma_coefficient_type member
+    ASSERT_TRUE(std::holds_alternative<plssvm::gamma_coefficient_type>(gamma_value));
+    // check the variant string
+    EXPECT_EQ(plssvm::get_gamma_string(gamma_value), std::string{ "unknown" });
+}
diff --git a/tests/hpx_main.cpp b/tests/hpx_main.cpp
index af087933b..ac84be4b4 100644
--- a/tests/hpx_main.cpp
+++ b/tests/hpx_main.cpp
@@ -10,13 +10,15 @@
  * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast".
  */
 
+#include "plssvm/environment.hpp"  // plssvm::environment::scope_guard
+
 #include "gtest/gtest.h"  // RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG},GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions
 
 #include <cstdlib>  // std::atexit
 
 // Workaround as HPX runtime not working properly with Google Test
 // Run the entire main function in HPX runtime
-#include <hpx/hpx_main.hpp>
+#include "hpx/hpx_main.hpp"
 
 // silence GTest warnings/test errors
 
@@ -48,6 +50,9 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest);
 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception);
 
 int main(int argc, char **argv) {
+    // initialize MPI environment only via the plssvm::scope_guard (by explicitly specifying NO backend)
+    [[maybe_unused]] plssvm::environment::scope_guard mpi_guard{ {} };
+
     ::testing::InitGoogleTest(&argc, argv);
 
     // prevent problems with fork() in the presence of multiple threads
diff --git a/tests/kernel_functions.cpp b/tests/kernel_functions.cpp
index f06a760b2..e2734aae5 100644
--- a/tests/kernel_functions.cpp
+++ b/tests/kernel_functions.cpp
@@ -61,7 +61,7 @@ class KernelFunctionVector : public ::testing::Test {
     std::vector<std::array<plssvm::real_type, 4>> param_values_{
         std::array{ plssvm::real_type{ 3.0 }, plssvm::real_type{ 0.05 }, plssvm::real_type{ 1.0 }, plssvm::real_type{ 1.0 } },
         std::array{ plssvm::real_type{ 1.0 }, plssvm::real_type{ 0.0 }, plssvm::real_type{ 0.0 }, plssvm::real_type{ 1.0 } },
-        std::array{ plssvm::real_type{ 4.0 }, plssvm::real_type{ -0.05 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } },
+        std::array{ plssvm::real_type{ 4.0 }, plssvm::real_type{ 0.01 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } },
         std::array{ plssvm::real_type{ 2.0 }, plssvm::real_type{ 0.025 }, plssvm::real_type{ -1.0 }, plssvm::real_type{ 0.5 } },
     };
 };
diff --git a/tests/main.cpp b/tests/main.cpp
index 1e113de22..308d7f17d 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -15,6 +15,11 @@
 
 #include <cstdlib>  // std::atexit
 
+#ifdef __clang__
+    #pragma clang diagnostic push
+    #pragma clang diagnostic ignored "-Wunused-variable"
+#endif
+
 // silence GTest warnings/test errors
 
 // generic C-SVM tests
@@ -51,6 +56,10 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest);
 // exception tests
 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception);
 
+#ifdef __clang__
+    #pragma clang diagnostic pop
+#endif
+
 static void ensure_finalization() {
     plssvm::environment::finalize();
 }
diff --git a/tests/classification_model.cpp b/tests/model/classification_model.cpp
similarity index 100%
rename from tests/classification_model.cpp
rename to tests/model/classification_model.cpp
diff --git a/tests/regression_model.cpp b/tests/model/regression_model.cpp
similarity index 100%
rename from tests/regression_model.cpp
rename to tests/model/regression_model.cpp
diff --git a/tests/mpi/communicator.cpp b/tests/mpi/communicator.cpp
new file mode 100644
index 000000000..30078c8a5
--- /dev/null
+++ b/tests/mpi/communicator.cpp
@@ -0,0 +1,295 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for MPI communicator wrapper.
+ * @note Assumes only a single MPI rank, since more are **not** supported in our tests!
+ */
+
+#include "plssvm/mpi/communicator.hpp"
+
+#include "plssvm/constants.hpp"              // plssvm::real_type
+#include "plssvm/exceptions/exceptions.hpp"  // plssvm::mpi_exception
+#include "plssvm/matrix.hpp"                 // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/shape.hpp"                  // plssvm::shape
+
+#include "tests/custom_test_macros.hpp"  // EXPECT_THROW_WHAT
+#include "tests/utility.hpp"             // util::generate_random_matrix
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_COMM_WORLD, MPI_IDENT, MPI_Comm_compare, MPI_Comm_dup, MPI_Comm_free
+#endif
+
+#include "gmock/gmock.h"  // ::testing::HasSubstr
+#include "gtest/gtest.h"  // TEST, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE
+
+#include <chrono>    // std::chrono::milliseconds
+#include <cstddef>   // std::size_t
+#include <iostream>  // std::cout, std::endl
+#include <string>    // std::string
+#include <vector>    // std::vector
+
+TEST(MPICommunicator, default_construct) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // load-balancing weights should be empty
+    EXPECT_FALSE(comm.get_load_balancing_weights().has_value());
+}
+
+TEST(MPICommunicator, construct_weights) {
+    const std::vector<std::size_t> weights = { std::size_t{ 42 } };
+    // create an MPI communicator with load-balancing weights
+    const plssvm::mpi::communicator comm{ weights };
+
+    // load-balancing weights should be set
+    ASSERT_TRUE(comm.get_load_balancing_weights().has_value());
+    EXPECT_EQ(comm.get_load_balancing_weights().value(), weights);
+}
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+TEST(MPICommunicator, construct_mpi_comm) {
+    // create an MPI communicator wrapping an MPI_Comm
+    const plssvm::mpi::communicator comm{ MPI_COMM_WORLD };
+
+    // load-balancing weights should be empty
+    EXPECT_FALSE(comm.get_load_balancing_weights().has_value());
+
+    // the wrapped MPI communicator should be equal to MPI_COMM_WORLD
+    int result{};
+    MPI_Comm_compare(static_cast<MPI_Comm>(comm), MPI_COMM_WORLD, &result);
+    EXPECT_EQ(result, MPI_IDENT);
+}
+
+TEST(MPICommunicator, construct_mpi_comm_and_weights) {
+    const std::vector<std::size_t> weights = { std::size_t{ 42 } };
+    // create a MPI communicator with load-balancing weights
+    const plssvm::mpi::communicator comm{ MPI_COMM_WORLD, weights };
+
+    // load-balancing weights should be set
+    ASSERT_TRUE(comm.get_load_balancing_weights().has_value());
+    EXPECT_EQ(comm.get_load_balancing_weights().value(), weights);
+
+    // the wrapped MPI communicator should be equal to MPI_COMM_WORLD
+    int result{};
+    MPI_Comm_compare(static_cast<MPI_Comm>(comm), MPI_COMM_WORLD, &result);
+    EXPECT_EQ(result, MPI_IDENT);
+}
+#endif
+
+TEST(MPICommunicator, size) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // the size must be 1 since MPI is disabled
+    EXPECT_EQ(comm.size(), std::size_t{ 1 });
+}
+
+TEST(MPICommunicator, rank) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // the rank must be 0 since MPI is disabled
+    EXPECT_EQ(comm.rank(), std::size_t{ 0 });
+}
+
+TEST(MPICommunicator, main_rank) {
+    // always 0
+    EXPECT_EQ(plssvm::mpi::communicator::main_rank(), std::size_t{ 0 });
+}
+
+TEST(MPICommunicator, is_mpi_enabled) {
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    EXPECT_TRUE(plssvm::mpi::communicator::is_mpi_enabled());
+#else
+    EXPECT_FALSE(plssvm::mpi::communicator::is_mpi_enabled());
+#endif
+}
+
+TEST(MPICommunicator, is_main_rank) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // always true since MPI is disabled
+    EXPECT_TRUE(comm.is_main_rank());
+}
+
+TEST(MPICommunicator, serialize) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // check if serialize can be called correctly
+    comm.serialize([&]() { std::cout << comm.rank() << std::endl; });
+}
+
+TEST(MPICommunicator, gather) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // since MPI is disabled, a call to gather must return a vector containing one value which is equal to the provided one
+    const std::vector<int> res = comm.gather(42);
+    EXPECT_EQ(res.size(), std::size_t{ 1 });
+    EXPECT_EQ(res.front(), 42);
+}
+
+TEST(MPICommunicator, gather_string) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // the string to send
+    const std::string msg{ "Hello, World!" };
+
+    // since MPI is disabled, a call to gather must return a vector containing one value which is equal to the provided one
+    const std::vector<std::string> res = comm.gather(msg);
+    EXPECT_EQ(res.size(), std::size_t{ 1 });
+    EXPECT_EQ(res.front(), msg);
+}
+
+TEST(MPICommunicator, gather_milli) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // since MPI is disabled, a call to gather must return a vector containing one value which is equal to the provided one
+    const std::vector<std::chrono::milliseconds> res = comm.gather(std::chrono::milliseconds{ 13 });
+    EXPECT_EQ(res.size(), std::size_t{ 1 });
+    EXPECT_EQ(res.front(), std::chrono::milliseconds{ 13 });
+}
+
+TEST(MPICommunicator, allgather) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // since MPI is disabled, a call to allgather should behave like a call to gather
+    const std::vector<int> res = comm.allgather(42);
+    EXPECT_EQ(res.size(), std::size_t{ 1 });
+    EXPECT_EQ(res.front(), 42);
+}
+
+TEST(MPICommunicator, allreduce_inplace) {
+    // create a default constructed MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // since MPI is disabled, a call to allreduce_inplace must return a vector containing one value which is equal to the provided one
+    {
+        auto matr = util::generate_random_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ 2, 2 });
+        const auto matr_correct = matr;
+        comm.allreduce_inplace(matr);
+        EXPECT_EQ(matr, matr_correct);
+    }
+    {
+        auto matr = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ 2, 2 });
+        const auto matr_correct = matr;
+        comm.allreduce_inplace(matr);
+        EXPECT_EQ(matr, matr_correct);
+    }
+}
+
+TEST(MPICommunicator, set_load_balancing_weights) {
+    const std::vector<std::size_t> weights = { std::size_t{ 42 } };
+    // create default constructed MPI communicator
+    plssvm::mpi::communicator comm{};
+
+    // should have no load-balancing weights
+    ASSERT_FALSE(comm.get_load_balancing_weights().has_value());
+
+    // set new load-balancing weights
+    comm.set_load_balancing_weights(weights);
+
+    // now, there should be load-balancing weights
+    ASSERT_TRUE(comm.get_load_balancing_weights().has_value());
+    EXPECT_EQ(comm.get_load_balancing_weights(), weights);
+}
+
+TEST(MPICommunicator, get_load_balancing_weights) {
+    const std::vector<std::size_t> weights = { std::size_t{ 42 } };
+    // create default constructed MPI communicator
+    plssvm::mpi::communicator comm{};
+
+    // should have no load-balancing weights
+    ASSERT_FALSE(comm.get_load_balancing_weights().has_value());
+
+    // set new load-balancing weights
+    comm.set_load_balancing_weights(weights);
+
+    // now, there should be load-balancing weights
+    EXPECT_TRUE(comm.get_load_balancing_weights().has_value());
+}
+
+TEST(MPICommunicator, equal) {
+    // create two default constructed MPI communicators
+    const plssvm::mpi::communicator comm1{};
+    const plssvm::mpi::communicator comm2{};
+
+    // since MPI is disabled, two communicator should always be equal
+    EXPECT_TRUE(comm1 == comm2);
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    const plssvm::mpi::communicator comm3{ MPI_COMM_WORLD };
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm4{ duplicated_mpi_comm };
+
+    EXPECT_TRUE(comm1 == comm3);
+    EXPECT_FALSE(comm1 == comm4);
+    EXPECT_FALSE(comm3 == comm4);
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+#endif
+}
+
+TEST(MPICommunicator, unequal) {
+    // create two default constructed MPI communicators
+    const plssvm::mpi::communicator comm1{};
+    const plssvm::mpi::communicator comm2{};
+
+    // since MPI is disabled, two communicator should never be unequal
+    EXPECT_FALSE(comm1 != comm2);
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    const plssvm::mpi::communicator comm3{ MPI_COMM_WORLD };
+    // create a duplicated communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm4{ duplicated_mpi_comm };
+
+    EXPECT_FALSE(comm1 != comm3);
+    EXPECT_TRUE(comm1 != comm4);
+    EXPECT_TRUE(comm3 != comm4);
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+#endif
+}
+
+TEST(MPICommunicatorDeathTest, construct_too_few_weights) {
+    // since MPI is not enabled, we can only pass exactly ONE weight value
+    EXPECT_THROW_WHAT_MATCHER(plssvm::mpi::communicator{ std::vector<std::size_t>{} }, plssvm::mpi_exception, ::testing::HasSubstr("The number of load balancing weights (0) must match the number of MPI ranks (1)!"));
+}
+
+TEST(MPICommunicatorDeathTest, construct_too_many_weights) {
+    // since MPI is not enabled, we can only pass exactly ONE weight value
+    EXPECT_THROW_WHAT_MATCHER((plssvm::mpi::communicator{ std::vector<std::size_t>{ std::size_t{ 1 }, std::size_t{ 2 } } }),
+                              plssvm::mpi_exception,
+                              ::testing::HasSubstr("The number of load balancing weights (2) must match the number of MPI ranks (1)!"));
+}
+
+TEST(MPICommunicatorDeathTest, set_too_few_load_balancing_weights) {
+    // create default constructed MPI communicator
+    plssvm::mpi::communicator comm{};
+
+    // since MPI is not enabled, we can only pass exactly ONE weight value
+    EXPECT_THROW_WHAT_MATCHER(comm.set_load_balancing_weights({}), plssvm::mpi_exception, ::testing::HasSubstr("The number of load balancing weights (0) must match the number of MPI ranks (1)!"));
+}
+
+TEST(MPICommunicatorDeathTest, set_too_many_load_balancing_weights) {
+    // create default constructed MPI communicator
+    plssvm::mpi::communicator comm{};
+
+    // since MPI is not enabled, we can only pass exactly ONE weight value
+    EXPECT_THROW_WHAT_MATCHER((comm.set_load_balancing_weights(std::vector<std::size_t>{ std::size_t{ 1 }, std::size_t{ 2 } })),
+                              plssvm::mpi_exception,
+                              ::testing::HasSubstr("The number of load balancing weights (2) must match the number of MPI ranks (1)!"));
+}
diff --git a/tests/mpi/detail/information.cpp b/tests/mpi/detail/information.cpp
new file mode 100644
index 000000000..9e7799dfa
--- /dev/null
+++ b/tests/mpi/detail/information.cpp
@@ -0,0 +1,61 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Very basic tests for MPI information functions.
+ * @note Assumes only a single MPI rank, since more are **not** supported in our tests!
+ */
+
+#include "plssvm/mpi/detail/information.hpp"
+
+#include "plssvm/backend_types.hpp"     // plssvm::backend_type
+#include "plssvm/mpi/communicator.hpp"  // plssvm::mpi::communicator
+#include "plssvm/solver_types.hpp"      // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"  // plssvm::target_platform
+
+#include "tests/utility.hpp"  // util::redirect_output
+
+#include "gtest/gtest.h"  // ::testing::Test, TEST, EXPECT_FALSE
+
+#include <iostream>  // std::cout
+#include <string>    // std::string
+#include <vector>    // std::vector
+
+class MPIInformation : public ::testing::Test,
+                       public util::redirect_output<&std::cout> { };
+
+TEST_F(MPIInformation, gather_and_print_solver_information) {
+    // construct an MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // call the print function
+    plssvm::mpi::detail::gather_and_print_solver_information(comm, plssvm::solver_type::cg_explicit);
+
+    // the capture may not be empty
+    EXPECT_FALSE(this->get_capture().empty());
+}
+
+TEST_F(MPIInformation, gather_and_print_csvm_information_with_device_names) {
+    // construct an MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // call the print function
+    plssvm::mpi::detail::gather_and_print_csvm_information(comm, plssvm::backend_type::cuda, plssvm::target_platform::gpu_nvidia, std::vector<std::string>{ "GPU1", "GPU2" });
+
+    // the capture may not be empty
+    EXPECT_FALSE(this->get_capture().empty());
+}
+
+TEST_F(MPIInformation, gather_and_print_csvm_information) {
+    // construct an MPI communicator
+    const plssvm::mpi::communicator comm{};
+
+    // call the print function
+    plssvm::mpi::detail::gather_and_print_csvm_information(comm, plssvm::backend_type::cuda, plssvm::target_platform::gpu_nvidia);
+
+    // the capture may not be empty
+    EXPECT_FALSE(this->get_capture().empty());
+}
diff --git a/tests/mpi/detail/mpi_datatype.cpp b/tests/mpi/detail/mpi_datatype.cpp
new file mode 100644
index 000000000..9ed41b4cc
--- /dev/null
+++ b/tests/mpi/detail/mpi_datatype.cpp
@@ -0,0 +1,60 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for MPI data type mapper functions.
+ * @note Assumes only a single MPI rank, since more are **not** supported in our tests!
+ */
+
+#include "plssvm/mpi/detail/mpi_datatype.hpp"
+
+#include "gtest/gtest.h"  // TEST, EXPECT_FALSE, EXPECT_DEATH
+
+#include <complex>  // std::complex
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TEST(MPIDataTypes, mpi_datatype) {
+    // check type conversions
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<bool>(), MPI_C_BOOL);
+
+    // character types
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<char>(), MPI_CHAR);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<signed char>(), MPI_SIGNED_CHAR);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<unsigned char>(), MPI_UNSIGNED_CHAR);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<wchar_t>(), MPI_WCHAR);
+
+    // integer types
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<signed short>(), MPI_SHORT);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<unsigned short>(), MPI_UNSIGNED_SHORT);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<signed int>(), MPI_INT);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<unsigned int>(), MPI_UNSIGNED);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<signed long int>(), MPI_LONG);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<unsigned long int>(), MPI_UNSIGNED_LONG);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<signed long long int>(), MPI_LONG_LONG);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<unsigned long long int>(), MPI_UNSIGNED_LONG_LONG);
+
+    // floating point types
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<float>(), MPI_FLOAT);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<double>(), MPI_DOUBLE);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<long double>(), MPI_LONG_DOUBLE);
+
+    // complex types
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<std::complex<float>>(), MPI_C_COMPLEX);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<std::complex<double>>(), MPI_C_DOUBLE_COMPLEX);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<std::complex<long double>>(), MPI_C_LONG_DOUBLE_COMPLEX);
+}
+
+enum class dummy1 : int {};
+enum class dummy2 : char {};
+
+TEST(MPIDataTypes, mpi_datatype_from_enum) {
+    // check type conversions from enum's underlying type
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<dummy1>(), MPI_INT);
+    EXPECT_EQ(plssvm::mpi::detail::mpi_datatype<dummy2>(), MPI_CHAR);
+}
+
+#endif
diff --git a/tests/mpi/detail/utility.cpp b/tests/mpi/detail/utility.cpp
new file mode 100644
index 000000000..278b9ec1f
--- /dev/null
+++ b/tests/mpi/detail/utility.cpp
@@ -0,0 +1,41 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for MPI utility functions.
+ * @note Assumes only a single MPI rank, since more are **not** supported in our tests!
+ */
+
+#include "plssvm/mpi/detail/utility.hpp"
+
+#include "plssvm/exceptions/exceptions.hpp"  // plssvm::mpi_exception
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_SUCCESS, MPI_ERR_COMM
+#endif
+
+#include "gtest/gtest.h"  // TEST, EXPECT_FALSE, EXPECT_THROW, EXPECT_NO_THROW
+
+#include <string>  // std::string
+
+TEST(MPIUtility, mpi_error_check) {
+    // test error check macro
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    // if MPI is enabled, MPI_SUCCESS may never throw
+    EXPECT_NO_THROW(plssvm::mpi::detail::mpi_error_check(MPI_SUCCESS));
+
+    // if MPI is enabled, MPI_ERR_COMM must throw
+    EXPECT_THROW(plssvm::mpi::detail::mpi_error_check(MPI_ERR_COMM), plssvm::mpi_exception);
+#else
+    // if MPI is disabled, may never throw
+    EXPECT_NO_THROW(plssvm::mpi::detail::mpi_error_check(1));
+#endif
+}
+
+TEST(MPIUtility, node_name) {
+    // the MPI node name may not be empty
+    EXPECT_FALSE(plssvm::mpi::detail::node_name().empty());
+}
diff --git a/tests/mpi/detail/version.cpp b/tests/mpi/detail/version.cpp
new file mode 100644
index 000000000..59f99e828
--- /dev/null
+++ b/tests/mpi/detail/version.cpp
@@ -0,0 +1,26 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for MPI version functions.
+ * @note Assumes only a single MPI rank, since more are **not** supported in our tests!
+ */
+
+#include "plssvm/mpi/detail/version.hpp"
+
+#include "gtest/gtest.h"  // TEST, EXPECT_FALSE
+
+#include <string>  // std::string
+
+TEST(MPIVersion, mpi_library_version) {
+    // the MPI library version may not be empty
+    EXPECT_FALSE(plssvm::mpi::detail::mpi_library_version().empty());
+}
+
+TEST(MPIVersion, mpi_version) {
+    // the MPI version may not be empty
+    EXPECT_FALSE(plssvm::mpi::detail::mpi_version().empty());
+}
diff --git a/tests/mpi/environment.cpp b/tests/mpi/environment.cpp
new file mode 100644
index 000000000..812f1f5ab
--- /dev/null
+++ b/tests/mpi/environment.cpp
@@ -0,0 +1,24 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for MPI environment wrapper functions.
+ * @note Assumes only a single MPI rank, since more are **not** supported in our tests!
+ */
+
+#include "plssvm/mpi/environment.hpp"
+
+#include "gtest/gtest.h"  // TEST, EXPECT_FALSE, EXPECT_DEATH
+
+TEST(MPIEnvironment, is_executed_via_mpirun) {
+    // since we do not support mpirun ctest, the function must return false
+    EXPECT_FALSE(plssvm::mpi::is_executed_via_mpirun());
+}
+
+TEST(MPIEnvironmentDeathTest, abort_world) {
+    // test whether the abort function fires correctly
+    EXPECT_DEATH(plssvm::mpi::abort_world(), "");
+}
diff --git a/tests/parameter.cpp b/tests/parameter.cpp
index 7db96c6bf..80de6eec7 100644
--- a/tests/parameter.cpp
+++ b/tests/parameter.cpp
@@ -40,13 +40,13 @@ TEST(Parameter, default_construct) {
 
 TEST(Parameter, construct) {
     // construct a parameter set explicitly overwriting the default values
-    const plssvm::parameter param{ plssvm::kernel_function_type::polynomial, 1, plssvm::real_type{ -1.0 }, plssvm::real_type{ 2.5 }, plssvm::real_type{ 0.05 } };
+    const plssvm::parameter param{ plssvm::kernel_function_type::polynomial, 1, plssvm::real_type{ 0.1 }, plssvm::real_type{ 2.5 }, plssvm::real_type{ 0.05 } };
 
     // test default values
     EXPECT_EQ(param.kernel_type, plssvm::kernel_function_type::polynomial);
     EXPECT_EQ(param.degree, 1);
     ASSERT_TRUE(std::holds_alternative<plssvm::real_type>(param.gamma));
-    EXPECT_FLOATING_POINT_EQ(std::get<plssvm::real_type>(param.gamma), plssvm::real_type{ -1.0 });
+    EXPECT_FLOATING_POINT_EQ(std::get<plssvm::real_type>(param.gamma), plssvm::real_type{ 0.1 });
     EXPECT_FLOATING_POINT_EQ(param.coef0, plssvm::real_type{ 2.5 });
     EXPECT_FLOATING_POINT_EQ(param.cost, plssvm::real_type{ 0.05 });
 }
@@ -75,14 +75,14 @@ TEST(Parameter, construct_named_args) {
     const plssvm::parameter param{
         plssvm::kernel_type = plssvm::kernel_function_type::polynomial,
         plssvm::cost = 0.05,
-        plssvm::gamma = -1.0
+        plssvm::gamma = 0.1
     };
 
     // test default values
     EXPECT_EQ(param.kernel_type, plssvm::kernel_function_type::polynomial);
     EXPECT_EQ(param.degree, 3);
     ASSERT_TRUE(std::holds_alternative<plssvm::real_type>(param.gamma));
-    EXPECT_FLOATING_POINT_EQ(std::get<plssvm::real_type>(param.gamma), plssvm::real_type{ -1.0 });
+    EXPECT_FLOATING_POINT_EQ(std::get<plssvm::real_type>(param.gamma), plssvm::real_type{ 0.1 });
     EXPECT_FLOATING_POINT_EQ(param.coef0, plssvm::real_type{ 0.0 });
     EXPECT_FLOATING_POINT_EQ(param.cost, plssvm::real_type{ 0.05 });
 }
@@ -92,7 +92,7 @@ TEST(Parameter, construct_parameter_and_named_args) {
     const plssvm::parameter param_base{
         plssvm::kernel_type = plssvm::kernel_function_type::laplacian,
         plssvm::cost = 0.05,
-        plssvm::gamma = -1.0
+        plssvm::gamma = 0.1
     };
 
     // create new parameter set using a previous parameter set together with some named parameters
@@ -106,11 +106,38 @@ TEST(Parameter, construct_parameter_and_named_args) {
     EXPECT_EQ(param.kernel_type, plssvm::kernel_function_type::rbf);
     EXPECT_EQ(param.degree, 3);
     ASSERT_TRUE(std::holds_alternative<plssvm::real_type>(param.gamma));
-    EXPECT_FLOATING_POINT_EQ(std::get<plssvm::real_type>(param.gamma), plssvm::real_type{ -1.0 });
+    EXPECT_FLOATING_POINT_EQ(std::get<plssvm::real_type>(param.gamma), plssvm::real_type{ 0.1 });
     EXPECT_FLOATING_POINT_EQ(param.coef0, plssvm::real_type{ 0.0 });
     EXPECT_FLOATING_POINT_EQ(param.cost, plssvm::real_type{ 0.05 });
 }
 
+TEST(Parameter, construct_invalid_kernel_type) {
+    EXPECT_THROW_WHAT(plssvm::parameter{ plssvm::kernel_type = static_cast<plssvm::kernel_function_type>(6) },
+                      plssvm::invalid_parameter_exception,
+                      "Invalid kernel function with value 6 given!");
+}
+
+TEST(Parameter, construct_invalid_degree) {
+    EXPECT_THROW_WHAT((plssvm::parameter{ plssvm::kernel_type = plssvm::kernel_function_type::polynomial, plssvm::degree = -1 }),
+                      plssvm::invalid_parameter_exception,
+                      "degree must be non-negative, but is -1!");
+}
+
+TEST(Parameter, construct_invalid_gamma) {
+    EXPECT_THROW_WHAT(plssvm::parameter{ plssvm::gamma = plssvm::real_type{ -0.1 } },
+                      plssvm::invalid_parameter_exception,
+                      "gamma must be non-negative, but is -0.1!");
+}
+
+TEST(Parameter, construct_invalid_cost) {
+    EXPECT_THROW_WHAT(plssvm::parameter{ plssvm::cost = plssvm::real_type{ 0.0 } },
+                      plssvm::invalid_parameter_exception,
+                      "cost must be strictly-positive, but is 0!");
+    EXPECT_THROW_WHAT(plssvm::parameter{ plssvm::cost = plssvm::real_type{ -0.1 } },
+                      plssvm::invalid_parameter_exception,
+                      "cost must be strictly-positive, but is -0.1!");
+}
+
 TEST(Parameter, equal) {
     // test whether different parameter sets are equal, i.e., all member variables have the same value
     const plssvm::parameter params1{ plssvm::kernel_function_type::rbf, 3, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
@@ -163,14 +190,12 @@ TEST(Parameter, equivalent_member_function) {
     const plssvm::parameter params2{ plssvm::kernel_function_type::rbf, 3, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params3{ plssvm::kernel_function_type::linear, 3, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params4{ plssvm::kernel_function_type::rbf, 2, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
-    const plssvm::parameter params5{ plssvm::kernel_function_type::linear, 2, plssvm::real_type{ -0.02 }, plssvm::real_type{ 0.5 }, plssvm::real_type{ 1.0 } };
+    const plssvm::parameter params5{ plssvm::kernel_function_type::linear, 2, plssvm::real_type{ 0.04 }, plssvm::real_type{ 0.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params6{ plssvm::kernel_function_type::polynomial, 2, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params7{ plssvm::kernel_function_type::polynomial, 2, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params8{ plssvm::kernel_function_type::sigmoid, 0, plssvm::real_type{ 0.2 }, plssvm::real_type{ -1.5 }, plssvm::real_type{ 0.2 } };
     const plssvm::parameter params9{ plssvm::kernel_function_type::laplacian, 0, plssvm::real_type{ 0.1 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 0.1 } };
     const plssvm::parameter params10{ plssvm::kernel_function_type::chi_squared, 1, plssvm::real_type{ 0.02 }, plssvm::real_type{ 0.5 }, plssvm::real_type{ 1.0 } };
-    const plssvm::parameter params11{ static_cast<plssvm::kernel_function_type>(6), 3, plssvm::real_type{ 0.2 }, plssvm::real_type{ -1.5 }, plssvm::real_type{ 0.1 } };
-    const plssvm::parameter params12{ static_cast<plssvm::kernel_function_type>(6), 3, plssvm::real_type{ 0.2 }, plssvm::real_type{ -1.5 }, plssvm::real_type{ 0.1 } };
 
     // test
     EXPECT_TRUE(params1.equivalent(params2));
@@ -181,9 +206,8 @@ TEST(Parameter, equivalent_member_function) {
     EXPECT_TRUE(params6.equivalent(params7));
     EXPECT_FALSE(params6.equivalent(params8));
     EXPECT_FALSE(params8.equivalent(params9));
+    EXPECT_TRUE(params8.equivalent(params8));
     EXPECT_FALSE(params4.equivalent(params10));
-    EXPECT_FALSE(params6.equivalent(params11));
-    EXPECT_FALSE(params8.equivalent(params12));
 }
 
 TEST(Parameter, equivalent_member_function_default_constructed) {
@@ -201,14 +225,12 @@ TEST(Parameter, equivalent_free_function) {
     const plssvm::parameter params2{ plssvm::kernel_function_type::rbf, 3, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params3{ plssvm::kernel_function_type::linear, 3, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params4{ plssvm::kernel_function_type::rbf, 2, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
-    const plssvm::parameter params5{ plssvm::kernel_function_type::linear, 2, plssvm::real_type{ -0.02 }, plssvm::real_type{ 0.5 }, plssvm::real_type{ 1.0 } };
+    const plssvm::parameter params5{ plssvm::kernel_function_type::linear, 2, plssvm::real_type{ 0.04 }, plssvm::real_type{ 0.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params6{ plssvm::kernel_function_type::polynomial, 2, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params7{ plssvm::kernel_function_type::polynomial, 2, plssvm::real_type{ 0.02 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params8{ plssvm::kernel_function_type::sigmoid, 0, plssvm::real_type{ 0.2 }, plssvm::real_type{ -1.5 }, plssvm::real_type{ 0.2 } };
     const plssvm::parameter params9{ plssvm::kernel_function_type::laplacian, 0, plssvm::real_type{ 0.1 }, plssvm::real_type{ 1.5 }, plssvm::real_type{ 1.0 } };
     const plssvm::parameter params10{ plssvm::kernel_function_type::chi_squared, 1, plssvm::real_type{ 0.02 }, plssvm::real_type{ 0.5 }, plssvm::real_type{ 0.1 } };
-    const plssvm::parameter params11{ static_cast<plssvm::kernel_function_type>(6), 3, plssvm::real_type{ 0.2 }, plssvm::real_type{ -1.5 }, plssvm::real_type{ 0.1 } };
-    const plssvm::parameter params12{ static_cast<plssvm::kernel_function_type>(6), 3, plssvm::real_type{ 0.2 }, plssvm::real_type{ -1.5 }, plssvm::real_type{ 0.1 } };
 
     // test
     EXPECT_TRUE(plssvm::equivalent(params1, params2));
@@ -219,9 +241,8 @@ TEST(Parameter, equivalent_free_function) {
     EXPECT_TRUE(plssvm::equivalent(params6, params7));
     EXPECT_FALSE(plssvm::equivalent(params6, params8));
     EXPECT_FALSE(plssvm::equivalent(params8, params9));
+    EXPECT_TRUE(plssvm::equivalent(params8, params8));
     EXPECT_FALSE(plssvm::equivalent(params4, params10));
-    EXPECT_FALSE(plssvm::equivalent(params6, params11));
-    EXPECT_FALSE(plssvm::equivalent(params8, params12));
 }
 
 TEST(Parameter, equivalent_free_function_default_constructed) {
@@ -235,10 +256,10 @@ TEST(Parameter, equivalent_free_function_default_constructed) {
 
 TEST(Parameter, to_string) {
     // check conversions to std::string
-    const plssvm::parameter param{ plssvm::kernel_function_type::linear, 3, plssvm::real_type{ 0.0 }, plssvm::real_type{ 0.0 }, plssvm::real_type{ 1.0 } };
+    const plssvm::parameter param{ plssvm::kernel_function_type::linear, 3, plssvm::real_type{ 0.1 }, plssvm::real_type{ 0.0 }, plssvm::real_type{ 1.0 } };
     EXPECT_CONVERSION_TO_STRING(param, fmt::format("kernel_type                 linear\n"
                                                    "degree                      3\n"
-                                                   "gamma                       0\n"
+                                                   "gamma                       0.1\n"
                                                    "coef0                       0\n"
                                                    "cost                        1\n"
                                                    "real_type                   {}\n",
diff --git a/tests/regression_report.cpp b/tests/regression_report.cpp
new file mode 100644
index 000000000..c9451dcc7
--- /dev/null
+++ b/tests/regression_report.cpp
@@ -0,0 +1,176 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for functions related to the regression report.
+ */
+
+#include "plssvm/regression_report.hpp"
+
+#include "plssvm/exceptions/exceptions.hpp"  // plssvm::regression_report_exception
+
+#include "tests/custom_test_macros.hpp"  // EXPECT_THROW_WHAT, EXPECT_CONVERSION_TO_STRING, EXPECT_FLOATING_POINT_NEAR, EXPECT_FLOATING_POINT_NEAR_EPS
+#include "tests/utility.hpp"             // util::redirect_output
+
+#include "gmock/gmock.h"  // EXPECT_THAT, ::testing::ContainsRegex
+#include "gtest/gtest.h"  // TEST, TEST_F, ::testing::Test
+
+#include <iostream>  // std::cout
+#include <string>    // std::string
+#include <vector>    // std::vector
+
+//*************************************************************************************************************************************//
+//                                                               metrics                                                               //
+//*************************************************************************************************************************************//
+
+TEST(RegressionReportMetrics, construct_metric) {
+    // construct a metric object
+    const plssvm::regression_report::metric m{ 0.1, 0.2, 0.3, 0.4, 0.5 };
+
+    // check if values are set correctly
+    EXPECT_FLOATING_POINT_NEAR(m.explained_variance_score, 0.1);
+    EXPECT_FLOATING_POINT_NEAR(m.mean_absolute_error, 0.2);
+    EXPECT_FLOATING_POINT_NEAR(m.mean_squared_error, 0.3);
+    EXPECT_FLOATING_POINT_NEAR(m.r2_score, 0.4);
+    EXPECT_FLOATING_POINT_NEAR(m.squared_correlation_coefficient, 0.5);
+}
+
+TEST(RegressionReportMetrics, output_metric) {
+    // construct a metric object
+    EXPECT_CONVERSION_TO_STRING((plssvm::regression_report::metric{ 0.1, 0.2, 0.3, 0.4, 0.5 }),
+                                "Explained variance score:        0.1\n"
+                                "Mean absolute error:             0.2\n"
+                                "Mean squared error:              0.3\n"
+                                "R^2 score:                       0.4\n"
+                                "Squared correlation coefficient: 0.5");
+}
+
+class RegressionReport : public ::testing::Test,
+                         public util::redirect_output<> {
+  protected:
+    /**
+     * @brief Return the correct labels to calculate the regression report with.
+     * @return the correct labels (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const std::vector<plssvm::real_type> &get_correct_label() const noexcept {
+        return correct_label_;
+    }
+
+    /**
+     * @brief Return the predicted labels to calculate the regression report with.
+     * @return the predicted labels (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const std::vector<plssvm::real_type> &get_predicted_label() const noexcept {
+        return predicted_label_;
+    }
+
+  private:
+    /// The correct class labels.
+    std::vector<plssvm::real_type> correct_label_ = { 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0 };
+    /// The predicted class labels.
+    std::vector<plssvm::real_type> predicted_label_ = { 0.1, 0.4, 0.7, 0.8, 1.1, 1.2, 1.5, 1.6, 1.7, 2.0 };
+};
+
+TEST_F(RegressionReport, construct) {
+    // construct a regression report
+    const plssvm::regression_report report{ this->get_correct_label(), this->get_predicted_label() };
+
+    // check if values are set correctly
+    const plssvm::regression_report::metric m = report.loss();
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.explained_variance_score, 0.9851515151515151, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.mean_absolute_error, 0.05, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.mean_squared_error, 0.005, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.r2_score, 0.9848484848484849, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.squared_correlation_coefficient, 0.9852899678673179, 1e6);
+}
+
+TEST_F(RegressionReport, construct_perfect_prediction) {
+    // construct a regression report
+    const plssvm::regression_report report{ this->get_correct_label(), this->get_correct_label() };
+
+    // check if values are set correctly
+    const plssvm::regression_report::metric m = report.loss();
+    EXPECT_FLOATING_POINT_NEAR(m.explained_variance_score, 1.0);
+    EXPECT_FLOATING_POINT_NEAR(m.mean_absolute_error, 0.0);
+    EXPECT_FLOATING_POINT_NEAR(m.mean_squared_error, 0.0);
+    EXPECT_FLOATING_POINT_NEAR(m.r2_score, 1.0);
+    EXPECT_FLOATING_POINT_NEAR(m.squared_correlation_coefficient, 1.0);
+}
+
+TEST_F(RegressionReport, construct_force_finite) {
+    // construct a regression report
+    const plssvm::regression_report report{ this->get_correct_label(), this->get_predicted_label(), plssvm::regression_report::force_finite = true };
+
+    // check if values are set correctly
+    const plssvm::regression_report::metric m = report.loss();
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.explained_variance_score, 0.9851515151515151, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.mean_absolute_error, 0.05, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.mean_squared_error, 0.005, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.r2_score, 0.9848484848484849, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.squared_correlation_coefficient, 0.9852899678673179, 1e6);
+}
+
+TEST_F(RegressionReport, construct_force_finite_perfect_prediction) {
+    // construct a regression report
+    const plssvm::regression_report report{ this->get_correct_label(), this->get_correct_label(), plssvm::regression_report::force_finite = true };
+
+    // check if values are set correctly
+    const plssvm::regression_report::metric m = report.loss();
+    EXPECT_FLOATING_POINT_NEAR(m.explained_variance_score, 1.0);
+    EXPECT_FLOATING_POINT_NEAR(m.mean_absolute_error, 0.0);
+    EXPECT_FLOATING_POINT_NEAR(m.mean_squared_error, 0.0);
+    EXPECT_FLOATING_POINT_NEAR(m.r2_score, 1.0);
+    EXPECT_FLOATING_POINT_NEAR(m.squared_correlation_coefficient, 1.0);
+}
+
+TEST_F(RegressionReport, construct_empty_correct_label) {
+    // the correct labels vector must not be empty
+    EXPECT_THROW_WHAT((plssvm::regression_report{ std::vector<plssvm::real_type>{}, this->get_predicted_label() }),
+                      plssvm::regression_report_exception,
+                      "The correct labels list must not be empty!");
+}
+
+TEST_F(RegressionReport, construct_empty_predicted_label) {
+    // the predicted labels vector must not be empty
+    EXPECT_THROW_WHAT((plssvm::regression_report{ this->get_correct_label(), std::vector<plssvm::real_type>{} }),
+                      plssvm::regression_report_exception,
+                      "The predicted labels list must not be empty!");
+}
+
+TEST_F(RegressionReport, construct_label_size_mismatch) {
+    // constructing a regression report with different number of correct and predicted labels must throw
+    EXPECT_THROW_WHAT((plssvm::regression_report{ std::vector<plssvm::real_type>{ 0, 0, 0 }, std::vector<plssvm::real_type>{ 0, 0 } }),
+                      plssvm::regression_report_exception,
+                      "The number of correct labels (3) and predicted labels (2) must be the same!");
+}
+
+TEST_F(RegressionReport, loss) {
+    // construct a regression report
+    const plssvm::regression_report report{ this->get_correct_label(), this->get_predicted_label() };
+
+    // check loss values
+    const plssvm::regression_report::metric m = report.loss();
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.explained_variance_score, 0.9851515151515151, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.mean_absolute_error, 0.05, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.mean_squared_error, 0.005, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.r2_score, 0.9848484848484849, 1e6);
+    EXPECT_FLOATING_POINT_NEAR_EPS(m.squared_correlation_coefficient, 0.9852899678673179, 1e6);
+}
+
+TEST_F(RegressionReport, regression_report) {
+    // construct a regression report
+    const plssvm::regression_report report{ this->get_correct_label(), this->get_predicted_label() };
+    std::cout << report;
+
+    // check output
+    const std::string correct_output =
+        "Explained variance score:        .*\n"
+        "Mean absolute error:             .*\n"
+        "Mean squared error:              .*\n"
+        "R\\^2 score:                       .*\n"
+        "Squared correlation coefficient: .*";
+    EXPECT_THAT(this->get_capture(), ::testing::ContainsRegex(correct_output));
+}
diff --git a/tests/svm/csvc.cpp b/tests/svm/csvc.cpp
index 67a7467b1..0a505d094 100644
--- a/tests/svm/csvc.cpp
+++ b/tests/svm/csvc.cpp
@@ -23,14 +23,18 @@
 #include "plssvm/parameter.hpp"                         // plssvm::parameter
 #include "plssvm/solver_types.hpp"                      // plssvm::solver_type
 
-#include "tests/custom_test_macros.hpp"  // EXPECT_THROW_WHAT, EXPECT_INCLUSIVE_RANGE
+#include "tests/custom_test_macros.hpp"  // EXPECT_THROW_WHAT, EXPECT_THROW_WHAT_MATCHER, EXPECT_INCLUSIVE_RANGE
 #include "tests/naming.hpp"              // naming::parameter_definition_to_name
 #include "tests/svm/mock_csvc.hpp"       // mock_csvc
 #include "tests/types_to_test.hpp"       // util::classification_label_type_classification_type_gtest
 #include "tests/utility.hpp"             // util::{redirect_output, temporary_file, instantiate_template_file, get_num_classes, calculate_number_of_classifiers,
                                          // generate_random_matrix, get_correct_data_file_labels}
 
-#include "gmock/gmock.h"  // EXPECT_CALL, EXPECT_THAT, ::testing::{An, Between, Return, HasSubstr}
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_COMM_WORLD, MPI_Comm_dup, MPI_Comm_free
+#endif
+
+#include "gmock/gmock.h"  // EXPECT_CALL, EXPECT_THAT, ::testing::{An, Between, Return, HasSubstr, ContainsRegex}
 #include "gtest/gtest.h"  // TEST, TYPED_TEST, TYPED_TEST_SUITE, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE, EXPECT_THAT,
 
 #include <cstddef>  // std::size_t
@@ -447,6 +451,65 @@ TYPED_TEST(BaseCSVCFit, fit_named_parameters_invalid_max_iter) {
                       "max_iter must be greater than 0, but is 0!");
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(BaseCSVCFit, fit_communicator_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::solver_type solver = TestFixture::fixture_solver;
+    constexpr plssvm::kernel_function_type kernel = TestFixture::fixture_kernel;
+    constexpr plssvm::classification_type classification = TestFixture::fixture_classification;
+
+    // create C-SVC: must be done using the mock class since the csvc base class is pure virtual
+    const mock_csvc csvc{ plssvm::parameter{ plssvm::kernel_type = kernel } };
+
+    // since an exception should be triggered, the mocked function should never be called
+    // clang-format off
+    EXPECT_CALL(csvc, get_device_memory()).Times(0);
+    EXPECT_CALL(csvc, num_available_devices()).Times(0);
+#if defined(PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE)
+    EXPECT_CALL(csvc, get_max_mem_alloc_size()).Times(0);
+#endif
+    EXPECT_CALL(csvc, assemble_kernel_matrix(
+                            ::testing::An<plssvm::solver_type>(),
+                            ::testing::An<const plssvm::parameter &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const std::vector<plssvm::real_type> &>(),
+                            ::testing::An<plssvm::real_type>()))
+                        .Times(0);
+    EXPECT_CALL(csvc, blas_level_3(
+                            ::testing::An<plssvm::solver_type>(),
+                            ::testing::An<plssvm::real_type>(),
+                            ::testing::An<const std::vector<plssvm::detail::move_only_any> &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<plssvm::real_type>(),
+                            ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>()))
+                        .Times(0);
+    // clang-format on
+
+    // create mismatching MPI communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set
+    plssvm::classification_data_set<label_type> training_data{ comm, this->get_data_filename() };
+    if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+        // chi-squared is well-defined for non-negative values only
+        if (training_data.labels().has_value()) {
+            training_data = plssvm::classification_data_set<label_type>{ comm, util::matrix_abs(training_data.data()), *training_data.labels() };
+        }
+    }
+
+    // calling the function with mismatching MPI communicators should throw
+    EXPECT_THROW_WHAT((std::ignore = csvc.fit(training_data, plssvm::solver = solver, plssvm::classification = classification)),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVC and data set must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(BaseCSVCFit, fit_no_label) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::solver_type solver = TestFixture::fixture_solver;
@@ -493,6 +556,114 @@ TYPED_TEST(BaseCSVCFit, fit_no_label) {
                       "No labels given for training! Maybe the data is only usable for prediction?");
 }
 
+TYPED_TEST(BaseCSVCFit, fit_out_of_resources) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::solver_type solver = TestFixture::fixture_solver;
+    constexpr plssvm::kernel_function_type kernel = TestFixture::fixture_kernel;
+    constexpr plssvm::classification_type classification = TestFixture::fixture_classification;
+
+    // this test is only really applicable for the automatic solver type
+    if constexpr (solver == plssvm::solver_type::automatic) {
+        // create C-SVC: must be done using the mock class since the csvc base class is pure virtual
+        const mock_csvc csvc{ plssvm::parameter{ plssvm::kernel_type = kernel } };
+
+        // override on call
+        using namespace plssvm::detail::literals;
+        ON_CALL(csvc, get_device_memory()).WillByDefault(::testing::Return(std::vector<plssvm::detail::memory_size>{ 512_MiB + 1_KiB, 512_MiB + 1_KiB }));
+
+        // clang-format off
+        EXPECT_CALL(csvc, get_device_memory()).Times(1);
+        EXPECT_CALL(csvc, num_available_devices()).Times(1);
+#if defined(PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE)
+        EXPECT_CALL(csvc, get_max_mem_alloc_size()).Times(1);
+#endif
+        EXPECT_CALL(csvc, assemble_kernel_matrix(
+                                ::testing::An<plssvm::solver_type>(),
+                                ::testing::An<const plssvm::parameter &>(),
+                                ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                                ::testing::An<const std::vector<plssvm::real_type> &>(),
+                                ::testing::An<plssvm::real_type>()))
+                            .Times(0);
+        EXPECT_CALL(csvc, blas_level_3(
+                                ::testing::An<plssvm::solver_type>(),
+                                ::testing::An<plssvm::real_type>(),
+                                ::testing::An<const std::vector<plssvm::detail::move_only_any> &>(),
+                                ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                                ::testing::An<plssvm::real_type>(),
+                                ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>()))
+                            .Times(0);
+        // clang-format on
+
+        // create data set
+        plssvm::classification_data_set<label_type> training_data{ this->get_data_filename() };
+        if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+            // chi-squared is well-defined for non-negative values only
+            if (training_data.labels().has_value()) {
+                training_data = plssvm::classification_data_set<label_type>{ util::matrix_abs(training_data.data()), *training_data.labels() };
+            }
+        }
+
+        // call function -> should throw since we are out of resources
+        EXPECT_THROW_WHAT_MATCHER((std::ignore = csvc.fit(training_data, plssvm::solver = solver, plssvm::classification = classification)),
+                                  plssvm::kernel_launch_resources,
+                                  ::testing::ContainsRegex("Not enough device memory available on device.* even for the cg_implicit solver!"));
+    }
+}
+
+TYPED_TEST(BaseCSVCFit, fit_device_memory_too_small) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::solver_type solver = TestFixture::fixture_solver;
+    constexpr plssvm::kernel_function_type kernel = TestFixture::fixture_kernel;
+    constexpr plssvm::classification_type classification = TestFixture::fixture_classification;
+
+    // this test is only really applicable for the automatic solver type
+    if constexpr (solver == plssvm::solver_type::automatic) {
+        // create C-SVC: must be done using the mock class since the csvc base class is pure virtual
+        const mock_csvc csvc{ plssvm::parameter{ plssvm::kernel_type = kernel } };
+
+        // override on call
+        using namespace plssvm::detail::literals;
+        ON_CALL(csvc, get_device_memory()).WillByDefault(::testing::Return(std::vector<plssvm::detail::memory_size>{ 1_KiB, 1_KiB }));
+
+        // clang-format off
+        EXPECT_CALL(csvc, get_device_memory()).Times(1);
+        EXPECT_CALL(csvc, num_available_devices()).Times(0);
+#if defined(PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE)
+        EXPECT_CALL(csvc, get_max_mem_alloc_size()).Times(0);
+#endif
+        EXPECT_CALL(csvc, assemble_kernel_matrix(
+                                ::testing::An<plssvm::solver_type>(),
+                                ::testing::An<const plssvm::parameter &>(),
+                                ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                                ::testing::An<const std::vector<plssvm::real_type> &>(),
+                                ::testing::An<plssvm::real_type>()))
+                            .Times(0);
+        EXPECT_CALL(csvc, blas_level_3(
+                                ::testing::An<plssvm::solver_type>(),
+                                ::testing::An<plssvm::real_type>(),
+                                ::testing::An<const std::vector<plssvm::detail::move_only_any> &>(),
+                                ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                                ::testing::An<plssvm::real_type>(),
+                                ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>()))
+                            .Times(0);
+        // clang-format on
+
+        // create data set
+        plssvm::classification_data_set<label_type> training_data{ this->get_data_filename() };
+        if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+            // chi-squared is well-defined for non-negative values only
+            if (training_data.labels().has_value()) {
+                training_data = plssvm::classification_data_set<label_type>{ util::matrix_abs(training_data.data()), *training_data.labels() };
+            }
+        }
+
+        // call function -> should throw since we are out of resources
+        EXPECT_THROW_WHAT((std::ignore = csvc.fit(training_data, plssvm::solver = solver, plssvm::classification = classification)),
+                          plssvm::kernel_launch_resources,
+                          "At least 512.00 MiB of memory must be available, but available are only 1.00 KiB!");
+    }
+}
+
 template <typename T>
 class BaseCSVCPredict : public BaseCSVCMemberBase<T> { };
 
@@ -558,6 +729,49 @@ TYPED_TEST(BaseCSVCPredict, predict_num_feature_mismatch) {
                       "Number of features per data point (2) must match the number of features per support vector of the provided model (4)!");
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(BaseCSVCPredict, predict_communicator_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create C-SVC: must be done using the mock class since the csvc base class is pure virtual
+    const mock_csvc csvc{};
+
+    // mock the predict_values function -> since an exception should be triggered, the mocked function should never be called
+    // clang-format off
+    EXPECT_CALL(csvc, predict_values(
+                            ::testing::An<const plssvm::parameter &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const plssvm::aos_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const std::vector<plssvm::real_type> &>(),
+                            ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>())).Times(0);
+    // clang-format on
+
+    // create mismatching MPI communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set and previously learned model
+    const plssvm::classification_data_set<label_type> data_to_predict{ this->get_data_filename() };
+    const plssvm::classification_data_set<label_type> data_to_predict_wrong_comm{ comm, this->get_data_filename() };
+    const plssvm::classification_model<label_type> learned_model{ this->get_model_filename() };
+    const plssvm::classification_model<label_type> learned_model_wrong_comm{ comm, this->get_model_filename() };
+
+    // calling the function with mismatching MPI communicators should throw
+    EXPECT_THROW_WHAT(std::ignore = csvc.predict(learned_model_wrong_comm, data_to_predict),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVC and model must be identical!");
+    EXPECT_THROW_WHAT(std::ignore = csvc.predict(learned_model, data_to_predict_wrong_comm),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVC and data set must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 template <typename T>
 class BaseCSVCScore : public BaseCSVCMemberBase<T> { };
 
@@ -686,3 +900,46 @@ TYPED_TEST(BaseCSVCScore, score_data_set_num_features_mismatch) {
                                   data.num_cols(),
                                   learned_model.num_features()));
 }
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(BaseCSVCScore, predict_communicator_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create C-SVC: must be done using the mock class since the csvc base class is pure virtual
+    const mock_csvc csvc{};
+
+    // mock the predict_values function -> since an exception should be triggered, the mocked function should never be called
+    // clang-format off
+    EXPECT_CALL(csvc, predict_values(
+                            ::testing::An<const plssvm::parameter &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const plssvm::aos_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const std::vector<plssvm::real_type> &>(),
+                            ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>())).Times(0);
+    // clang-format on
+
+    // create mismatching MPI communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set and previously learned model
+    const plssvm::classification_data_set<label_type> data_to_predict{ this->get_data_filename() };
+    const plssvm::classification_data_set<label_type> data_to_predict_wrong_comm{ comm, this->get_data_filename() };
+    const plssvm::classification_model<label_type> learned_model{ this->get_model_filename() };
+    const plssvm::classification_model<label_type> learned_model_wrong_comm{ comm, this->get_model_filename() };
+
+    // calling the function with mismatching MPI communicators should throw
+    EXPECT_THROW_WHAT(std::ignore = csvc.score(learned_model_wrong_comm, data_to_predict),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVC and model must be identical!");
+    EXPECT_THROW_WHAT(std::ignore = csvc.score(learned_model, data_to_predict_wrong_comm),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVC and data set must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
diff --git a/tests/svm/csvm.cpp b/tests/svm/csvm.cpp
index c1cabf239..fb2caa9df 100644
--- a/tests/svm/csvm.cpp
+++ b/tests/svm/csvm.cpp
@@ -48,26 +48,6 @@ TEST(BaseCSVM, construct_from_parameter) {
     EXPECT_EQ(csvm.get_params(), params);
 }
 
-TEST(BaseCSVM, construct_from_parameter_invalid_kernel_type) {
-    // create parameter
-    const plssvm::parameter params{ plssvm::kernel_type = static_cast<plssvm::kernel_function_type>(6) };
-
-    // create C-SVM: must be done using the mock class since the csvm base class is pure virtual
-    EXPECT_THROW_WHAT(mock_csvm{ params },
-                      plssvm::invalid_parameter_exception,
-                      "Invalid kernel function with value 6 given!");
-}
-
-TEST(BaseCSVM, construct_from_parameter_invalid_gamma) {
-    // create parameter
-    const plssvm::parameter params{ plssvm::kernel_type = plssvm::kernel_function_type::polynomial, plssvm::gamma = -1.0 };
-
-    // create C-SVM: must be done using the mock class since the csvm base class is pure virtual
-    EXPECT_THROW_WHAT(mock_csvm{ params },
-                      plssvm::invalid_parameter_exception,
-                      "gamma must be greater than 0.0, but is -1!");
-}
-
 TEST(BaseCSVM, construct_linear_from_named_parameters) {
     // correct parameter
     const plssvm::parameter params{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 };
@@ -107,20 +87,6 @@ TEST(BaseCSVM, construct_rbf_from_named_parameters) {
     EXPECT_TRUE(csvm.get_params().equivalent(params));
 }
 
-TEST(BaseCSVM, construct_from_named_parameters_invalid_kernel_type) {
-    // create C-SVM: must be done using the mock class since the csvm base class is pure virtual
-    EXPECT_THROW_WHAT(mock_csvm{ plssvm::kernel_type = static_cast<plssvm::kernel_function_type>(6) },
-                      plssvm::invalid_parameter_exception,
-                      "Invalid kernel function with value 6 given!");
-}
-
-TEST(BaseCSVM, construct_from_named_parameters_invalid_gamma) {
-    // create C-SVM: must be done using the mock class since the csvm base class is pure virtual
-    EXPECT_THROW_WHAT((mock_csvm{ plssvm::kernel_type = plssvm::kernel_function_type::polynomial, plssvm::gamma = -1.0 }),
-                      plssvm::invalid_parameter_exception,
-                      "gamma must be greater than 0.0, but is -1!");
-}
-
 TEST(BaseCSVM, get_target_platforms) {
     // create C-SVM: must be done using the mock class since the csvm base class is pure virtual
     const mock_csvm csvm{};
diff --git a/tests/svm/csvr.cpp b/tests/svm/csvr.cpp
index 8b6ca0883..d11f5343e 100644
--- a/tests/svm/csvr.cpp
+++ b/tests/svm/csvr.cpp
@@ -22,13 +22,17 @@
 #include "plssvm/parameter.hpp"                     // plssvm::parameter
 #include "plssvm/solver_types.hpp"                  // plssvm::solver_type
 
-#include "tests/custom_test_macros.hpp"  // EXPECT_THROW_WHAT, EXPECT_INCLUSIVE_RANGE
+#include "tests/custom_test_macros.hpp"  // EXPECT_THROW_WHAT, EXPECT_THROW_WHAT_MATCHER, EXPECT_INCLUSIVE_RANGE
 #include "tests/naming.hpp"              // naming::parameter_definition_to_name
 #include "tests/svm/mock_csvr.hpp"       // mock_csvr
 #include "tests/types_to_test.hpp"       // util::regression_label_type_classification_type_gtest
 #include "tests/utility.hpp"             // util::{redirect_output, temporary_file, instantiate_template_file, generate_random_matrix, get_correct_data_file_labels}
 
-#include "gmock/gmock.h"  // EXPECT_CALL, EXPECT_THAT, ::testing::{An, Between, Return, HasSubstr}
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+    #include "mpi.h"  // MPI_COMM_WORLD, MPI_Comm_dup, MPI_Comm_free
+#endif
+
+#include "gmock/gmock.h"  // EXPECT_CALL, EXPECT_THAT, ::testing::{An, Between, Return, HasSubstr, ContainsRegex}
 #include "gtest/gtest.h"  // TEST, TYPED_TEST, TYPED_TEST_SUITE, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE, EXPECT_THAT,
 
 #include <cstddef>  // std::size_t
@@ -427,6 +431,64 @@ TYPED_TEST(BaseCSVRFit, fit_named_parameters_invalid_max_iter) {
                       "max_iter must be greater than 0, but is 0!");
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(BaseCSVRFit, fit_communicator_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::solver_type solver = TestFixture::fixture_solver;
+    constexpr plssvm::kernel_function_type kernel = TestFixture::fixture_kernel;
+
+    // create C-SVR: must be done using the mock class since the csvr base class is pure virtual
+    const mock_csvr csvr{ plssvm::parameter{ plssvm::kernel_type = kernel } };
+
+    // since an exception should be triggered, the mocked function should never be called
+    // clang-format off
+    EXPECT_CALL(csvr, get_device_memory()).Times(0);
+    EXPECT_CALL(csvr, num_available_devices()).Times(0);
+#if defined(PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE)
+    EXPECT_CALL(csvr, get_max_mem_alloc_size()).Times(0);
+#endif
+    EXPECT_CALL(csvr, assemble_kernel_matrix(
+                            ::testing::An<plssvm::solver_type>(),
+                            ::testing::An<const plssvm::parameter &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const std::vector<plssvm::real_type> &>(),
+                            ::testing::An<plssvm::real_type>()))
+                        .Times(0);
+    EXPECT_CALL(csvr, blas_level_3(
+                            ::testing::An<plssvm::solver_type>(),
+                            ::testing::An<plssvm::real_type>(),
+                            ::testing::An<const std::vector<plssvm::detail::move_only_any> &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<plssvm::real_type>(),
+                            ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>()))
+                        .Times(0);
+    // clang-format on
+
+    // create mismatching MPI communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set
+    plssvm::regression_data_set<label_type> training_data{ comm, this->get_data_filename() };
+    if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+        // chi-squared is well-defined for non-negative values only
+        if (training_data.labels().has_value()) {
+            training_data = plssvm::regression_data_set<label_type>{ comm, util::matrix_abs(training_data.data()), *training_data.labels() };
+        }
+    }
+
+    // calling the function with mismatching MPI communicators should throw
+    EXPECT_THROW_WHAT((std::ignore = csvr.fit(training_data, plssvm::solver = solver)),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVR and data set must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 TYPED_TEST(BaseCSVRFit, fit_no_label) {
     using label_type = typename TestFixture::fixture_label_type;
     constexpr plssvm::solver_type solver = TestFixture::fixture_solver;
@@ -472,6 +534,112 @@ TYPED_TEST(BaseCSVRFit, fit_no_label) {
                       "No labels given for training! Maybe the data is only usable for prediction?");
 }
 
+TYPED_TEST(BaseCSVRFit, fit_out_of_resources) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::solver_type solver = TestFixture::fixture_solver;
+    constexpr plssvm::kernel_function_type kernel = TestFixture::fixture_kernel;
+
+    // this test is only really applicable for the automatic solver type
+    if constexpr (solver == plssvm::solver_type::automatic) {
+        // create C-SVC: must be done using the mock class since the csvr base class is pure virtual
+        const mock_csvr csvr{ plssvm::parameter{ plssvm::kernel_type = kernel } };
+
+        // override on call
+        using namespace plssvm::detail::literals;
+        ON_CALL(csvr, get_device_memory()).WillByDefault(::testing::Return(std::vector<plssvm::detail::memory_size>{ 512_MiB + 1_KiB, 512_MiB + 1_KiB }));
+
+        // clang-format off
+        EXPECT_CALL(csvr, get_device_memory()).Times(1);
+        EXPECT_CALL(csvr, num_available_devices()).Times(1);
+#if defined(PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE)
+        EXPECT_CALL(csvr, get_max_mem_alloc_size()).Times(1);
+#endif
+        EXPECT_CALL(csvr, assemble_kernel_matrix(
+                                ::testing::An<plssvm::solver_type>(),
+                                ::testing::An<const plssvm::parameter &>(),
+                                ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                                ::testing::An<const std::vector<plssvm::real_type> &>(),
+                                ::testing::An<plssvm::real_type>()))
+                            .Times(0);
+        EXPECT_CALL(csvr, blas_level_3(
+                                ::testing::An<plssvm::solver_type>(),
+                                ::testing::An<plssvm::real_type>(),
+                                ::testing::An<const std::vector<plssvm::detail::move_only_any> &>(),
+                                ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                                ::testing::An<plssvm::real_type>(),
+                                ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>()))
+                            .Times(0);
+        // clang-format on
+
+        // create data set
+        plssvm::regression_data_set<label_type> training_data{ this->get_data_filename() };
+        if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+            // chi-squared is well-defined for non-negative values only
+            if (training_data.labels().has_value()) {
+                training_data = plssvm::regression_data_set<label_type>{ util::matrix_abs(training_data.data()), *training_data.labels() };
+            }
+        }
+
+        // call function -> should throw since we are out of resources
+        EXPECT_THROW_WHAT_MATCHER((std::ignore = csvr.fit(training_data, plssvm::solver = solver)),
+                                  plssvm::kernel_launch_resources,
+                                  ::testing::ContainsRegex("Not enough device memory available on device.* even for the cg_implicit solver!"));
+    }
+}
+
+TYPED_TEST(BaseCSVRFit, fit_device_memory_too_small) {
+    using label_type = typename TestFixture::fixture_label_type;
+    constexpr plssvm::solver_type solver = TestFixture::fixture_solver;
+    constexpr plssvm::kernel_function_type kernel = TestFixture::fixture_kernel;
+
+    // this test is only really applicable for the automatic solver type
+    if constexpr (solver == plssvm::solver_type::automatic) {
+        // create C-SVC: must be done using the mock class since the csvr base class is pure virtual
+        const mock_csvr csvr{ plssvm::parameter{ plssvm::kernel_type = kernel } };
+
+        // override on call
+        using namespace plssvm::detail::literals;
+        ON_CALL(csvr, get_device_memory()).WillByDefault(::testing::Return(std::vector<plssvm::detail::memory_size>{ 1_KiB, 1_KiB }));
+
+        // clang-format off
+        EXPECT_CALL(csvr, get_device_memory()).Times(1);
+        EXPECT_CALL(csvr, num_available_devices()).Times(0);
+#if defined(PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE)
+        EXPECT_CALL(csvr, get_max_mem_alloc_size()).Times(0);
+#endif
+        EXPECT_CALL(csvr, assemble_kernel_matrix(
+                                ::testing::An<plssvm::solver_type>(),
+                                ::testing::An<const plssvm::parameter &>(),
+                                ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                                ::testing::An<const std::vector<plssvm::real_type> &>(),
+                                ::testing::An<plssvm::real_type>()))
+                            .Times(0);
+        EXPECT_CALL(csvr, blas_level_3(
+                                ::testing::An<plssvm::solver_type>(),
+                                ::testing::An<plssvm::real_type>(),
+                                ::testing::An<const std::vector<plssvm::detail::move_only_any> &>(),
+                                ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                                ::testing::An<plssvm::real_type>(),
+                                ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>()))
+                            .Times(0);
+        // clang-format on
+
+        // create data set
+        plssvm::regression_data_set<label_type> training_data{ this->get_data_filename() };
+        if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+            // chi-squared is well-defined for non-negative values only
+            if (training_data.labels().has_value()) {
+                training_data = plssvm::regression_data_set<label_type>{ util::matrix_abs(training_data.data()), *training_data.labels() };
+            }
+        }
+
+        // call function -> should throw since we are out of resources
+        EXPECT_THROW_WHAT((std::ignore = csvr.fit(training_data, plssvm::solver = solver)),
+                          plssvm::kernel_launch_resources,
+                          "At least 512.00 MiB of memory must be available, but available are only 1.00 KiB!");
+    }
+}
+
 template <typename T>
 class BaseCSVRPredict : public BaseCSVRMemberBase<T> { };
 
@@ -535,6 +703,49 @@ TYPED_TEST(BaseCSVRPredict, predict_num_feature_mismatch) {
                       "Number of features per data point (2) must match the number of features per support vector of the provided model (4)!");
 }
 
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(BaseCSVRPredict, predict_communicator_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create C-SVR: must be done using the mock class since the csvr base class is pure virtual
+    const mock_csvr csvr{};
+
+    // mock the predict_values function -> since an exception should be triggered, the mocked function should never be called
+    // clang-format off
+    EXPECT_CALL(csvr, predict_values(
+                            ::testing::An<const plssvm::parameter &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const plssvm::aos_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const std::vector<plssvm::real_type> &>(),
+                            ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>())).Times(0);
+    // clang-format on
+
+    // create mismatching MPI communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set and previously learned model
+    const plssvm::regression_data_set<label_type> data_to_predict{ this->get_data_filename() };
+    const plssvm::regression_data_set<label_type> data_to_predict_wrong_comm{ comm, this->get_data_filename() };
+    const plssvm::regression_model<label_type> learned_model{ this->get_model_filename() };
+    const plssvm::regression_model<label_type> learned_model_wrong_comm{ comm, this->get_model_filename() };
+
+    // calling the function with mismatching MPI communicators should throw
+    EXPECT_THROW_WHAT(std::ignore = csvr.predict(learned_model_wrong_comm, data_to_predict),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVR and model must be identical!");
+    EXPECT_THROW_WHAT(std::ignore = csvr.predict(learned_model, data_to_predict_wrong_comm),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVR and data set must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
+
 template <typename T>
 class BaseCSVRScore : public BaseCSVRMemberBase<T> { };
 
@@ -688,3 +899,46 @@ TYPED_TEST(BaseCSVRScore, score_data_set_num_features_mismatch) {
                                   data.num_cols(),
                                   learned_model.num_features()));
 }
+
+#if defined(PLSSVM_HAS_MPI_ENABLED)
+
+TYPED_TEST(BaseCSVRScore, predict_communicator_mismatch) {
+    using label_type = typename TestFixture::fixture_label_type;
+
+    // create C-SVR: must be done using the mock class since the csvr base class is pure virtual
+    const mock_csvr csvr{};
+
+    // mock the predict_values function -> since an exception should be triggered, the mocked function should never be called
+    // clang-format off
+    EXPECT_CALL(csvr, predict_values(
+                            ::testing::An<const plssvm::parameter &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const plssvm::aos_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const std::vector<plssvm::real_type> &>(),
+                            ::testing::An<plssvm::soa_matrix<plssvm::real_type> &>(),
+                            ::testing::An<const plssvm::soa_matrix<plssvm::real_type> &>())).Times(0);
+    // clang-format on
+
+    // create mismatching MPI communicator
+    MPI_Comm duplicated_mpi_comm;
+    MPI_Comm_dup(MPI_COMM_WORLD, &duplicated_mpi_comm);
+    const plssvm::mpi::communicator comm{ duplicated_mpi_comm };
+
+    // create data set and previously learned model
+    const plssvm::regression_data_set<label_type> data_to_predict{ this->get_data_filename() };
+    const plssvm::regression_data_set<label_type> data_to_predict_wrong_comm{ comm, this->get_data_filename() };
+    const plssvm::regression_model<label_type> learned_model{ this->get_model_filename() };
+    const plssvm::regression_model<label_type> learned_model_wrong_comm{ comm, this->get_model_filename() };
+
+    // calling the function with mismatching MPI communicators should throw
+    EXPECT_THROW_WHAT(std::ignore = csvr.score(learned_model_wrong_comm, data_to_predict),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVR and model must be identical!");
+    EXPECT_THROW_WHAT(std::ignore = csvr.score(learned_model, data_to_predict_wrong_comm),
+                      plssvm::mpi_exception,
+                      "The MPI communicators provided to the C-SVR and data set must be identical!");
+
+    MPI_Comm_free(&duplicated_mpi_comm);
+}
+
+#endif
diff --git a/tests/svm/mock_csvc.hpp b/tests/svm/mock_csvc.hpp
index 1ddbeb407..eebc91d99 100644
--- a/tests/svm/mock_csvc.hpp
+++ b/tests/svm/mock_csvc.hpp
@@ -13,8 +13,9 @@
 #define PLSSVM_TESTS_MOCK_CSVC_HPP_
 #pragma once
 
-#include "plssvm/svm/csvc.hpp"  // plssvm::csvc
-#include "plssvm/svm/csvm.hpp"  // plssvm::csvm
+#include "plssvm/mpi/communicator.hpp"  // plssvm::mpi::communicator
+#include "plssvm/svm/csvc.hpp"          // plssvm::csvc
+#include "plssvm/svm/csvm.hpp"          // plssvm::csvm
 
 #include "tests/svm/mock_csvm.hpp"  // mock_csvm
 
@@ -26,7 +27,7 @@ class mock_csvc final : virtual public plssvm::csvc,
   public:
     template <typename... Args>
     explicit mock_csvc(Args... args) :
-        plssvm::csvm{ args... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, args... },
         mock_csvm{ args... } { }
 };
 
diff --git a/tests/svm/mock_csvm.hpp b/tests/svm/mock_csvm.hpp
index ff9fdad65..6d70f12e1 100644
--- a/tests/svm/mock_csvm.hpp
+++ b/tests/svm/mock_csvm.hpp
@@ -17,6 +17,7 @@
 #include "plssvm/detail/memory_size.hpp"    // plssvm::detail::memory_size, plssvm::detail::literals
 #include "plssvm/detail/move_only_any.hpp"  // plssvm::detail::move_only_any
 #include "plssvm/matrix.hpp"                // plssvm::aos_matrix
+#include "plssvm/mpi/communicator.hpp"      // plssvm::mpi::communicator
 #include "plssvm/parameter.hpp"             // plssvm::parameter
 #include "plssvm/solver_types.hpp"          // plssvm::solver_type
 #include "plssvm/svm/csvm.hpp"              // plssvm::csvm
@@ -34,7 +35,7 @@ class mock_csvm : virtual public plssvm::csvm {
   public:
     template <typename... Args>
     explicit mock_csvm(Args &&...args) :
-        plssvm::csvm{ std::forward<Args>(args)... } {
+        plssvm::csvm{ plssvm::mpi::communicator{}, std::forward<Args>(args)... } {
         this->fake_functions();
     }
 
diff --git a/tests/svm/mock_csvr.hpp b/tests/svm/mock_csvr.hpp
index e3f478189..3125ff2be 100644
--- a/tests/svm/mock_csvr.hpp
+++ b/tests/svm/mock_csvr.hpp
@@ -13,8 +13,9 @@
 #define PLSSVM_TESTS_MOCK_CSVR_HPP_
 #pragma once
 
-#include "plssvm/svm/csvm.hpp"  // plssvm::csvm
-#include "plssvm/svm/csvr.hpp"  // plssvm::csvr
+#include "plssvm/mpi/communicator.hpp"  // plssvm::mpi::communicator
+#include "plssvm/svm/csvm.hpp"          // plssvm::csvm
+#include "plssvm/svm/csvr.hpp"          // plssvm::csvr
 
 #include "tests/svm/mock_csvm.hpp"  // mock_csvm
 
@@ -26,7 +27,7 @@ class mock_csvr final : virtual public plssvm::csvr,
   public:
     template <typename... Args>
     explicit mock_csvr(Args... args) :
-        plssvm::csvm{ args... },
+        plssvm::csvm{ plssvm::mpi::communicator{}, args... },
         mock_csvm{ args... } { }
 };
 
diff --git a/tests/svm_types.cpp b/tests/svm_types.cpp
index ee25ec4b8..0c97ff096 100644
--- a/tests/svm_types.cpp
+++ b/tests/svm_types.cpp
@@ -10,6 +10,8 @@
 
 #include "plssvm/svm_types.hpp"
 
+#include "plssvm/exceptions/exceptions.hpp"  // plssvm::invalid_file_format_exception
+
 #include "tests/custom_test_macros.hpp"  // EXPECT_CONVERSION_TO_STRING, EXPECT_CONVERSION_FROM_STRING
 
 #include "gmock/gmock.h"  // EXPECT_THAT, ::testing::Contains
@@ -17,6 +19,7 @@
 
 #include <sstream>      // std::istringstream
 #include <string_view>  // std::string_view
+#include <tuple>        // std::ignore
 #include <vector>       // std::vector
 
 // check whether the plssvm::svm_type -> std::string conversions are correct
@@ -71,6 +74,11 @@ TEST(SvmType, svm_type_to_task_name) {
     EXPECT_EQ(plssvm::svm_type_to_task_name(plssvm::svm_type::csvr), std::string_view{ "regression" });
 }
 
+TEST(SvmType, svm_type_to_task_name_unknown) {
+    // try converting an unknown SVM type to a task name
+    EXPECT_EQ(plssvm::svm_type_to_task_name(static_cast<plssvm::svm_type>(2)), std::string_view{ "unknown" });
+}
+
 TEST(SvmType, svm_type_from_model_file) {
     // check a classification model file
     EXPECT_EQ(plssvm::svm_type_from_model_file(PLSSVM_TEST_PATH "/data/model/classification/6x4.libsvm.model"), plssvm::svm_type::csvc);
@@ -78,3 +86,17 @@ TEST(SvmType, svm_type_from_model_file) {
     // check a regression model file
     EXPECT_EQ(plssvm::svm_type_from_model_file(PLSSVM_TEST_PATH "/data/model/regression/6x4.libsvm.model"), plssvm::svm_type::csvr);
 }
+
+TEST(SvmType, svm_type_from_model_file_missing_svm_type) {
+    // try getting the SVM type from an empty file won't work
+    EXPECT_THROW_WHAT(std::ignore = plssvm::svm_type_from_model_file(PLSSVM_TEST_PATH "/data/model/classification/invalid/missing_svm_type.libsvm.model"),
+                      plssvm::invalid_file_format_exception,
+                      R"(The provided model file is not a valid LIBSVM model file since "svm_type" is missing!)");
+}
+
+TEST(SvmType, svm_type_from_model_file_empty) {
+    // try getting the SVM type from an empty file won't work
+    EXPECT_THROW_WHAT(std::ignore = plssvm::svm_type_from_model_file(PLSSVM_TEST_PATH "/data/empty.txt"),
+                      plssvm::invalid_file_format_exception,
+                      R"(The provided model file is not a valid LIBSVM model file since "svm_type" AND "SV" are missing!)");
+}
diff --git a/tests/utility.hpp b/tests/utility.hpp
index 7e99ed8d1..7e9755259 100644
--- a/tests/utility.hpp
+++ b/tests/utility.hpp
@@ -30,7 +30,7 @@
 #include "fmt/std.h"      // format std::vector<bool>::operator[] proxy type
 #include "gtest/gtest.h"  // FAIL
 
-#ifdef __unix__
+#if defined(__unix__)
     #include <cstdlib>  // mkstemp
 #endif
 
@@ -388,7 +388,7 @@ template <typename T>
  * @return the randomly generated vector (`[[nodiscard]]`)
  */
 template <typename T, PLSSVM_REQUIRES(std::is_floating_point_v<T>)>
-[[nodiscard]] inline std::vector<T> generate_random_vector(const std::size_t size, const std::pair<T, T> range = { T{ -1.0 }, T{ 1.0 } }) {
+[[nodiscard]] inline std::vector<T> generate_random_vector(const std::size_t size, const std::pair<T, T> range = { static_cast<T>(-1.0), static_cast<T>(1.0) }) {
     std::vector<T> vec(size);
 
     // fill vectors with random values
@@ -449,7 +449,7 @@ template <typename T, PLSSVM_REQUIRES(std::is_integral_v<T> &&std::is_signed_v<T
  * @return the randomly generated matrix (`[[nodiscard]]`)
  */
 template <typename matrix_type, typename real_type = typename matrix_type::value_type>
-[[nodiscard]] inline matrix_type generate_random_matrix(const plssvm::shape shape, const std::pair<real_type, real_type> range = { real_type{ -1.0 }, real_type{ 1.0 } }) {
+[[nodiscard]] inline matrix_type generate_random_matrix(const plssvm::shape shape, const std::pair<real_type, real_type> range = { static_cast<real_type>(-1.0), static_cast<real_type>(1.0) }) {
     static_assert(std::is_floating_point_v<real_type>, "Only floating point types are allowed!");
 
     // create random number generator
@@ -477,7 +477,7 @@ template <typename matrix_type, typename real_type = typename matrix_type::value
  * @return the randomly generated matrix (`[[nodiscard]]`)
  */
 template <typename matrix_type, typename real_type = typename matrix_type::value_type>
-[[nodiscard]] inline matrix_type generate_random_matrix(const plssvm::shape shape, const plssvm::shape padding, const std::pair<real_type, real_type> range = { real_type{ -1.0 }, real_type{ 1.0 } }) {
+[[nodiscard]] inline matrix_type generate_random_matrix(const plssvm::shape shape, const plssvm::shape padding, const std::pair<real_type, real_type> range = { static_cast<real_type>(-1.0), static_cast<real_type>(1.0) }) {
     return matrix_type{ generate_random_matrix<matrix_type>(shape, range), padding };
 }
 
@@ -775,7 +775,7 @@ template <typename T, typename Tuple>
 
 /**
  * @brief Call the function @p func for each type in the @p Variant.
- * @brief The function @p func must have a templated overload of the `operator()()` function.
+ * @details The function @p func must have a templated overload of the `operator()()` function.
  * @tparam Variant the type of the std::variant
  * @tparam Func the type of the function to apply
  * @tparam Index the current index of the type the function should be applied to
diff --git a/utility_scripts/generate_data.py b/utility_scripts/generate_data.py
index 5e782b80e..a479f7498 100644
--- a/utility_scripts/generate_data.py
+++ b/utility_scripts/generate_data.py
@@ -179,16 +179,16 @@ def __call__(self, parser, namespace, values, option_string=None):
         # dump data in libsvm format
         if args.task == "classification":
             data_set = plssvm.ClassificationDataSet(samples[:args.samples, :], labels[:args.samples])
-            data_set.save(file, plssvm.FileFormatType.LIBSVM)
+            data_set.save(file, format=plssvm.FileFormatType.LIBSVM)
             if args.test_samples>0:
                 test_data_set = plssvm.ClassificationDataSet(samples[args.samples:, :], labels[args.samples:])
-                test_data_set.save(file, plssvm.FileFormatType.LIBSVM)
+                test_data_set.save(file, format=plssvm.FileFormatType.LIBSVM)
         elif args.task == "regression":
             data_set = plssvm.RegressionDataSet(samples[:args.samples, :], labels[:args.samples])
-            data_set.save(file, plssvm.FileFormatType.LIBSVM)
+            data_set.save(file, format=plssvm.FileFormatType.LIBSVM)
             if args.test_samples>0:
                 test_data_set = plssvm.RegressionDataSet(samples[args.samples:, :], labels[args.samples:])
-                test_data_set.save(file, plssvm.FileFormatType.LIBSVM)
+                test_data_set.save(file, format=plssvm.FileFormatType.LIBSVM)
         else:
             raise RuntimeError("Invalid type!")
     else:
diff --git a/utility_scripts/performance_analysis.py b/utility_scripts/performance_analysis.py
index fe99ea19b..675fb5202 100644
--- a/utility_scripts/performance_analysis.py
+++ b/utility_scripts/performance_analysis.py
@@ -68,10 +68,10 @@ def fit_model_with_timeout(csvm, data, eps):
 
     # create the data set
     train_data = plssvm.ClassificationDataSet(samples, labels, scaler=plssvm.MinMaxScaler(-1.0, 1.0))
-    train_data.save(args.intermediate_train_file, plssvm.FileFormatType.LIBSVM)
+    train_data.save(args.intermediate_train_file, format=plssvm.FileFormatType.LIBSVM)
 
     # create a C-SVM using the provided parameters and the default, i.e., fastest backend and target platform
-    svm = plssvm.CSVC(params)
+    svm = plssvm.CSVC(params=params)
 
     plssvm.quiet()
 
diff --git a/utility_scripts/plssvm_target_platforms.py b/utility_scripts/plssvm_target_platforms.py
index 21b511912..f936e3470 100644
--- a/utility_scripts/plssvm_target_platforms.py
+++ b/utility_scripts/plssvm_target_platforms.py
@@ -123,7 +123,7 @@ def cond_print(msg=""):
     if "Intel" in vga:
         # extract the architecture hex-value from the lspci line
         regex_pattern = r"\[[0-9]+:(.*?)\]"
-        pci_value = re.search("\[[0-9]+:(.*?)\]", vga)
+        pci_value = re.search(regex_pattern, vga)
         if pci_value:
             value = pci_value.group(1)
             intel_gpus.append("0x{}".format(value))