SC-SGS
diff --git a/‎.jenkins/Jenkinsfile-tests‎
Lines changed: 2 additions & 0 deletions b/‎.jenkins/Jenkinsfile-tests‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 4 additions & 3 deletions b/‎CMakeLists.txt‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 11 additions & 5 deletions b/‎README.md‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎include/plssvm/backends/OpenCL/detail/utility.hpp‎
Lines changed: 4 additions & 3 deletions b/‎include/plssvm/backends/OpenCL/detail/utility.hpp‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎include/plssvm/backends/SYCL/csvm.hpp‎
Lines changed: 8 additions & 2 deletions b/‎include/plssvm/backends/SYCL/csvm.hpp‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎include/plssvm/backends/SYCL/detail/utility.hpp‎
Lines changed: 6 additions & 4 deletions b/‎include/plssvm/backends/SYCL/detail/utility.hpp‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎include/plssvm/backends/SYCL/kernel_invocation_type.hpp‎
Lines changed: 46 additions & 0 deletions b/‎include/plssvm/backends/SYCL/kernel_invocation_type.hpp‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎include/plssvm/backends/SYCL/predict_kernel.hpp‎
Lines changed: 12 additions & 15 deletions b/‎include/plssvm/backends/SYCL/predict_kernel.hpp‎
Lines changed: 12 additions & 15 deletions
diff --git a/‎include/plssvm/backends/SYCL/q_kernel.hpp‎
Lines changed: 9 additions & 12 deletions b/‎include/plssvm/backends/SYCL/q_kernel.hpp‎
Lines changed: 9 additions & 12 deletions
@@ -198,6 +198,7 @@ pipeline {
                 }
             }
         }
+/*
         stage('build plssvm DPC++ Debug') {
             steps {
                 dir('plssvm') {
@@ -213,6 +214,7 @@ pipeline {
                 }
             }
         }
+*/
     }
     post {
         always {
 
@@ -6,10 +6,10 @@
 
 cmake_minimum_required(VERSION 3.18)
 
-project("PLSSVM - Parallel Least-Squares Support Vector Machine"
-        VERSION 1.0.1
+project("PLSSVM - Parallel Least Squares Support Vector Machine"
+        VERSION 1.1.0
         LANGUAGES CXX
-        DESCRIPTION "A Support Vector Machine implementation using different backends.")
+        DESCRIPTION "A Least Squares Support Vector Machine implementation using different backends.")
 
 
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/add_custom_build_type.cmake)
@@ -39,6 +39,7 @@ set(PLSSVM_BASE_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/parameter_predict.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/parameter_train.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/target_platforms.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/src/plssvm/backends/SYCL/kernel_invocation_type.cpp
 )
 
 ## create base library: linked against all backend libraries
 
@@ -259,9 +259,9 @@ LS-SVM with multiple (GPU-)backends
 Usage:
   ./svm-train [OPTION...] training_set_file [model_file]
 
-  -t, --kernel_type arg         set type of kernel function.
+  -t, --kernel_type arg         set type of kernel function. 
                                          0 -- linear: u'*v
-                                         1 -- polynomial: (gamma*u'*v + coef0)^degree
+                                         1 -- polynomial: (gamma*u'*v + coef0)^degree 
                                          2 -- radial basis function: exp(-gamma*|u-v|^2) (default: 0)
   -d, --degree arg              set degree in kernel function (default: 3)
   -g, --gamma arg               set gamma in kernel function (default: 1 / num_features)
@@ -270,11 +270,13 @@ Usage:
   -e, --epsilon arg             set the tolerance of termination criterion (default: 0.001)
   -b, --backend arg             choose the backend: openmp|cuda|opencl|sycl (default: openmp)
   -p, --target_platform arg     choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic)
+      --sycl_kernel_invocation_type arg
+                                choose the kernel invocation type when using SYCL as backend: automatic|nd_range|hierarchical (default: automatic)
   -q, --quiet                   quiet mode (no outputs)
   -h, --help                    print this helper message
       --input training_set_file
-
-      --model model_file
+                                
+      --model model_file  
 ```
 
 An example invocation using the CUDA backend could look like:
@@ -289,13 +291,17 @@ Another example targeting NVIDIA GPUs using the SYCL backend looks like:
 ./svm-train --backend sycl --target_platform gpu_nvidia --input /path/to/data_file
 ```
 
-The `--target_platform=automatic` flags works for the different backends as follows:
+The `--target_platform=automatic` flag works for the different backends as follows:
 
 - `OpenMP`: always selects a CPU
 - `CUDA`: always selects an NVIDIA GPU (if no NVIDIA GPU is available, throws an exception)
 - `OpenCL`: tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU
 - `SYCL`: tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU
 
+The `--sycl_kernel_invocation_type` flag is only used if the `--backend` is `sycl`, otherwise a warning is emitted on `stderr`.
+If the `--sycl_kernel_invocation_type` is `automatic`, the `nd_range` invocation type is always used, 
+except for hipSYCL on CPUs where the hierarchical formulation is used instead.
+
 ### Predicting
 
 ```bash
 
@@ -46,17 +46,18 @@ namespace plssvm::opencl::detail {
 void device_assert(error_code code, std::string_view msg = "");
 
 /**
- * @brief Returns the list devices matching the target platform @p target.
+ * @brief Returns the list devices matching the target platform @p target and the actually used target platform
+ *        (only interesting if the provided @p target was automatic).
  * @details If the selected target platform is `plssvm::target_platform::automatic` the selector tries to find devices in the following order:
  *          1. NVIDIA GPUs
  *          2. AMD GPUs
  *          3. Intel GPUs
  *          4. CPUs
  *
  * @param[in] target the target platform for which the devices must match
- * @return the command queues (`[[nodiscard]]`)
+ * @return the command queues and used target platform (`[[nodiscard]]`)
  */
-[[nodiscard]] std::vector<command_queue> get_command_queues(target_platform target);
+[[nodiscard]] std::pair<std::vector<command_queue>, target_platform> get_command_queues(target_platform target);
 
 /**
  * @brief Wait for the compute device associated with @p queue to finish.
 
@@ -11,8 +11,9 @@
 
 #pragma once
 
-#include "plssvm/backends/SYCL/detail/device_ptr.hpp"  // plssvm::sycl::detail::device_ptr
-#include "plssvm/backends/gpu_csvm.hpp"                // plssvm::detail::gpu_csvm
+#include "plssvm/backends/SYCL/detail/device_ptr.hpp"       // plssvm::sycl::detail::device_ptr
+#include "plssvm/backends/SYCL/kernel_invocation_type.hpp"  // plssvm::sycl::kernel_invocation_type
+#include "plssvm/backends/gpu_csvm.hpp"                     // plssvm::detail::gpu_csvm
 
 #include "sycl/sycl.hpp"  // sycl::queue
 
@@ -45,6 +46,7 @@ class csvm : public ::plssvm::detail::gpu_csvm<T, ::plssvm::sycl::detail::device
     using base_type::coef0_;
     using base_type::cost_;
     using base_type::degree_;
+    using base_type::dept_;
     using base_type::gamma_;
     using base_type::kernel_;
     using base_type::num_data_points_;
@@ -105,6 +107,10 @@ class csvm : public ::plssvm::detail::gpu_csvm<T, ::plssvm::sycl::detail::device
      * @copydoc plssvm::detail::gpu_csvm::run_predict_kernel
      */
     void run_predict_kernel(const ::plssvm::detail::execution_range &range, device_ptr_type &out_d, const device_ptr_type &alpha_d, const device_ptr_type &point_d, std::size_t num_predict_points) final;
+
+  private:
+    /// The SYCL kernel invocation type for the svm kernel. Either nd_range or hierarchical.
+    kernel_invocation_type invocation_type_;
 };
 
 extern template class csvm<float>;
 
@@ -15,22 +15,24 @@
 
 #include "sycl/sycl.hpp"  // sycl::queue
 
-#include <vector>  // std::vector
+#include <utility>  // std::pair
+#include <vector>   // std::vector
 
 namespace plssvm::sycl::detail {
 
 /**
- * @brief Returns the list devices matching the target platform @p target.
+ * @brief Returns the list devices matching the target platform @p target and the actually used target platform
+ *        (only interesting if the provided @p target was automatic).
  * @details If the selected target platform is `plssvm::target_platform::automatic` the selector tries to find devices in the following order:
  *          1. NVIDIA GPUs
  *          2. AMD GPUs
  *          3. Intel GPUs
  *          4. CPUs
  *
  * @param[in] target the target platform for which the devices must match
- * @return the devices (`[[nodiscard]]`)
+ * @return the devices and used target platform (`[[nodiscard]]`)
  */
-[[nodiscard]] std::vector<::sycl::queue> get_device_list(target_platform target);
+[[nodiscard]] std::pair<std::vector<::sycl::queue>, target_platform> get_device_list(target_platform target);
 /**
  * @brief Wait for the compute device associated with @p queue to finish.
  * @param[in] queue the SYCL queue to synchronize
 
@@ -0,0 +1,46 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines all available kernel invoke types when using SYCL.
+ */
+
+#pragma once
+
+#include <iosfwd>  // forward declare std::ostream and std::istream
+
+namespace plssvm::sycl {
+
+/**
+ * @brief Enum class for all possible SYCL kernel invocation types.
+ */
+enum class kernel_invocation_type {
+    /** Use the best kernel invocation type for the current SYCL implementation and target hardware platform. */
+    automatic,
+    /** Use the [*nd_range* invocation type](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke). */
+    nd_range,
+    /** Use the SYCL specific [hierarchical invocation type](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_hierarchical_invoke). */
+    hierarchical
+};
+
+/**
+ * @brief Output the @p invocation type to the given output-stream @p out.
+ * @param[in,out] out the output-stream to write the backend type to
+ * @param[in] invocation the SYCL kernel invocation type
+ * @return the output-stream
+ */
+std::ostream &operator<<(std::ostream &out, kernel_invocation_type invocation);
+
+/**
+ * @brief Use the input-stream @p in to initialize the @p invocation type.
+ * @param[in,out] in input-stream to extract the backend type from
+ * @param[in] invocation the SYCL kernel invocation type
+ * @return the input-stream
+ */
+std::istream &operator>>(std::istream &in, kernel_invocation_type &invocation);
+
+}  // namespace plssvm::sycl
@@ -44,12 +44,11 @@ class device_kernel_w_linear {
 
     /**
      * @brief Function call operator overload performing the actual calculation.
-     * @param[in] nd_idx the [`sycl::item`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:item.class)
-     *                   identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
+     * @param[in] index the [`sycl::id`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#id-class)
+     *                  identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
      */
-    void operator()(::sycl::nd_item<1> nd_idx) const {
-        const kernel_index_type index = nd_idx.get_global_linear_id();
-        real_type temp = 0;
+    void operator()(::sycl::id<1> index) const {
+        real_type temp{ 0.0 };
         if (index < num_features_) {
             for (kernel_index_type dat = 0; dat < num_data_points_ - 1; ++dat) {
                 temp += alpha_d_[dat] * data_d_[dat + (num_data_points_ - 1 + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * index];
@@ -99,12 +98,11 @@ class device_kernel_predict_poly {
 
     /**
      * @brief Function call operator overload performing the actual calculation.
-     * @param[in] nd_idx the [`sycl::item`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:item.class)
-     *                   identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
+     * @param[in] idx the [`sycl::nd_item`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#nditem-class) identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
      */
-    void operator()(::sycl::nd_item<2> nd_idx) const {
-        const kernel_index_type data_point_index = nd_idx.get_global_id(0);
-        const kernel_index_type predict_point_index = nd_idx.get_global_id(1);
+    void operator()(::sycl::nd_item<2> idx) const {
+        const kernel_index_type data_point_index = idx.get_global_id(0);
+        const kernel_index_type predict_point_index = idx.get_global_id(1);
 
         real_type temp = 0;
         if (predict_point_index < num_predict_points_) {
@@ -165,12 +163,11 @@ class device_kernel_predict_radial {
 
     /**
      * @brief Function call operator overload performing the actual calculation.
-     * @param[in] nd_idx the [`sycl::item`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:item.class)
-     *                   identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
+     * @param[in] idx the [`sycl::nd_item`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#nditem-class) identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
      */
-    void operator()(::sycl::nd_item<2> nd_idx) const {
-        const kernel_index_type data_point_index = nd_idx.get_global_id(0);
-        const kernel_index_type predict_point_index = nd_idx.get_global_id(1);
+    void operator()(::sycl::nd_item<2> idx) const {
+        const kernel_index_type data_point_index = idx.get_global_id(0);
+        const kernel_index_type predict_point_index = idx.get_global_id(1);
 
         real_type temp = 0;
         if (predict_point_index < num_predict_points_) {
 
@@ -41,11 +41,10 @@ class device_kernel_q_linear {
 
     /**
      * @brief Function call operator overload performing the actual calculation.
-     * @param[in] item the [`sycl::item`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:item.class)
-     *                 identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
+     * @param[in] index the [`sycl::id`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#id-class)
+     *                  identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
      */
-    void operator()(::sycl::nd_item<1> item) const {
-        const kernel_index_type index = item.get_global_linear_id();
+    void operator()(::sycl::id<1> index) const {
         real_type temp{ 0.0 };
         for (kernel_index_type i = 0; i < feature_range_; ++i) {
             temp += data_d_[i * num_rows_ + index] * data_last_[i];
@@ -88,11 +87,10 @@ class device_kernel_q_poly {
 
     /**
      * @brief Function call operator overload performing the actual calculation.
-     * @param[in] item the [`sycl::item`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:item.class)
-     *                 identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
+     * @param[in] index the [`sycl::id`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#id-class)
+     *                  identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
      */
-    void operator()(::sycl::nd_item<1> item) const {
-        const kernel_index_type index = item.get_global_linear_id();
+    void operator()(::sycl::id<1> index) const {
         real_type temp{ 0.0 };
         for (kernel_index_type i = 0; i < num_cols_; ++i) {
             temp += data_d_[i * num_rows_ + index] * data_last_[i];
@@ -136,11 +134,10 @@ class device_kernel_q_radial {
 
     /**
      * @brief Function call operator overload performing the actual calculation.
-     * @param[in] item the [`sycl::item`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:item.class)
-     *                 identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
+     * @param[in] index the [`sycl::id`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#id-class)
+     *                  identifying an instance of the functor executing at each point in a [`sycl::range`](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class)
      */
-    void operator()(::sycl::nd_item<1> item) const {
-        const kernel_index_type index = item.get_global_linear_id();
+    void operator()(::sycl::id<1> index) const {
         real_type temp{ 0.0 };
         for (kernel_index_type i = 0; i < num_cols_; ++i) {
             temp += (data_d_[i * num_rows_ + index] - data_last_[i]) * (data_d_[i * num_rows_ + index] - data_last_[i]);
Original file line number	Diff line number	Diff line change
`@@ -198,6 +198,7 @@ pipeline {`
`198`	`198`	`}`
`199`	`199`	`}`
`200`	`200`	`}`
	`201`	`+/*`
`201`	`202`	`stage('build plssvm DPC++ Debug') {`
`202`	`203`	`steps {`
`203`	`204`	`dir('plssvm') {`
`@@ -213,6 +214,7 @@ pipeline {`
`213`	`214`	`}`
`214`	`215`	`}`
`215`	`216`	`}`
	`217`	`+*/`
`216`	`218`	`}`
`217`	`219`	`post {`
`218`	`220`	`always {`