From b7adea4698ca8f8d91e501169674b8100f4528ea Mon Sep 17 00:00:00 2001 From: "sewon.jeon" Date: Sun, 8 Feb 2026 17:58:11 +0900 Subject: [PATCH 1/2] Refactor NSS and DNSS implementations for improved readability and modularity - Rewrote legacy code into clean C++17 style. - Added missing method implementations and input validation. - Introduced options struct for configurable parameters in NSS and DNSS. - Implemented CUDA support for DNSS rotational feature computation. - Added new CMake configuration for building with or without CUDA. - Updated README to reflect changes and provide build instructions. - Added .gitignore to exclude build directories. --- .gitignore | 2 + CMakeLists.txt | 21 ++ NSS.cpp | 855 +++++++++++++++++++++++++++++++------------------ NSS.h | 285 ++++++++++------- README.md | 60 +++- dnss_cuda.cu | 217 +++++++++++++ 6 files changed, 997 insertions(+), 443 deletions(-) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 dnss_cuda.cu diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3b912ee --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +build/ +build-cuda/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..c0dd088 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.20) + +project(DNSS LANGUAGES CXX) + +option(DNSS_ENABLE_CUDA "Enable CUDA acceleration for DNSS rotational feature computation" OFF) + +add_library(dnss NSS.cpp NSS.h) +target_include_directories(dnss PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_compile_features(dnss PUBLIC cxx_std_17) + +if(DNSS_ENABLE_CUDA) + enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) + if(NOT CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES native) + endif() + target_sources(dnss PRIVATE dnss_cuda.cu) + target_compile_definitions(dnss PRIVATE DNSS_HAS_CUDA=1) + target_link_libraries(dnss PUBLIC CUDA::cudart) + set_target_properties(dnss PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON) +endif() diff --git a/NSS.cpp b/NSS.cpp index 84e3e20..5d4e4ae 100644 --- a/NSS.cpp +++ b/NSS.cpp @@ -1,402 +1,637 @@ +#include "NSS.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +constexpr float kPi = 3.14159265359f; +constexpr float kTwoPi = 2.0f * kPi; +constexpr float kEpsilon = 1e-7f; -#include -#include -#include -#include -#include +float clampFloat(float value, float min_value, float max_value) +{ + return std::max(min_value, std::min(value, max_value)); +} -void NSS::normalSpaceSampling(const float &sample_rate, std::vector &sampled_vertices, std::vector &sampled_normals) +float dotProduct(const glm::fvec3 &lhs, const glm::fvec3 &rhs) { - std::vector> normbuckets; - sort_into_buckets(m_original_normals, normbuckets); - int ndesired = int(ceil(sample_rate * m_original_vertices.size())); - sampled_vertices.clear(); - sampled_normals.clear(); - while (sampled_vertices.size() < ndesired) + return lhs.x * rhs.x + lhs.y * rhs.y + lhs.z * rhs.z; +} + +glm::fvec3 crossProduct(const glm::fvec3 &lhs, const glm::fvec3 &rhs) +{ + return glm::fvec3( + lhs.y * rhs.z - lhs.z * rhs.y, + lhs.z * rhs.x - lhs.x * rhs.z, + lhs.x * rhs.y - lhs.y * rhs.x); +} + +float vectorLength(const glm::fvec3 &value) +{ + return std::sqrt(dotProduct(value, value)); +} + +glm::fvec3 normalizeSafe(const glm::fvec3 &value) +{ + const float norm = vectorLength(value); + if (!std::isfinite(norm) || norm <= kEpsilon) { - for (int i = 0; i < normbuckets.size(); i++) - { - if (!normbuckets[i].empty()) - { - int ind = normbuckets[i].back(); - sampled_vertices.emplace_back(m_original_vertices[ind]); - sampled_normals.emplace_back(m_original_normals[ind]); - normbuckets[i].pop_back(); - } - } + return glm::fvec3(0.0f, 0.0f, 1.0f); } + return value / norm; } -void NSS::sort_into_buckets(const std::vector &n, std::vector> &normbuckets) +float computeRotationalReturnValue(const glm::fvec3 &normalized_point, const glm::fvec3 &normal, float theta) { - const int Q = 4; - const float Qsqrt1_2 = 2.8284f; - normbuckets.resize(3 * Q * Q); - for (int i = 0; i < n.size(); i++) + const float point_norm = vectorLength(normalized_point); + if (!std::isfinite(point_norm) || point_norm <= kEpsilon) { - const float *N = &n[i][0]; + return 0.0f; + } - float ax = fabs(N[0]), ay = fabs(N[1]), az = fabs(N[2]); - int A; - float u, v; - if (ax > ay) - { - if (ax > az) - { - A = 0; - u = (N[0] > 0) ? N[1] : -N[1]; - v = (N[0] > 0) ? N[2] : -N[2]; - } - else - { - A = 2; - u = (N[2] > 0) ? N[0] : -N[0]; - v = (N[2] > 0) ? N[1] : -N[1]; - } - } - else + const glm::fvec3 point_dir = normalized_point / point_norm; + const glm::fvec3 normal_dir = normalizeSafe(normal); + const float dot_pn = clampFloat(dotProduct(point_dir, normal_dir), -1.0f, 1.0f); + const float beta = std::acos(dot_pn); + + const float pp = 2.0f * point_norm * std::sin(theta * 0.5f); + const float sin_beta = std::sin(beta); + const float cos_beta = std::cos(beta); + + const float pq_positive = pp * std::cos(beta - theta * 0.5f); + const float pq_negative = pp * std::cos(-beta - theta * 0.5f); + + const float denominator_positive = std::max(kEpsilon, point_norm - pq_positive * cos_beta); + const float denominator_negative = std::max(kEpsilon, point_norm - pq_negative * cos_beta); + + const float atan_positive = std::atan((pq_positive * sin_beta) / denominator_positive); + const float atan_negative = std::atan((pq_negative * (-sin_beta)) / denominator_negative); + + const float gamma_positive = theta - atan_positive; + const float gamma_negative = theta - atan_negative; + + const float rr_positive = point_norm * gamma_positive / theta; + const float rr_negative = point_norm * gamma_negative / theta; + const float rotational_return = std::max(rr_positive, rr_negative); + + if (!std::isfinite(rotational_return)) + { + return 0.0f; + } + return std::max(0.0f, rotational_return); +} + +int sphericalBucketIndex( + const glm::fvec3 &raw_normal, + int azimuth_bins, + int z_bins, + bool fold_antipodal) +{ + if (azimuth_bins <= 0 || z_bins <= 0) + { + return -1; + } + + glm::fvec3 normal = normalizeSafe(raw_normal); + + if (fold_antipodal) + { + const bool flip = + normal.z < 0.0f || + (normal.z == 0.0f && (normal.y < 0.0f || (normal.y == 0.0f && normal.x < 0.0f))); + if (flip) { - if (ay > az) - { - A = 1; - u = (N[1] > 0) ? N[2] : -N[2]; - v = (N[1] > 0) ? N[0] : -N[0]; - } - else - { - A = 2; - u = (N[2] > 0) ? N[0] : -N[0]; - v = (N[2] > 0) ? N[1] : -N[1]; - } + normal = -normal; } - int U = int(u * Qsqrt1_2) + (Q / 2); - int V = int(v * Qsqrt1_2) + (Q / 2); - normbuckets[((A * Q) + U) * Q + V].push_back(i); } - for (int bucket = 0; bucket < normbuckets.size(); bucket++) - std::random_shuffle(normbuckets[bucket].begin(), normbuckets[bucket].end()); + + float azimuth = std::atan2(normal.y, normal.x); + if (azimuth < 0.0f) + { + azimuth += kTwoPi; + } + + const float z_min = fold_antipodal ? 0.0f : -1.0f; + const float z_normalized = clampFloat((normal.z - z_min) / (1.0f - z_min), 0.0f, 1.0f); + + int azimuth_index = static_cast(std::floor(azimuth / kTwoPi * static_cast(azimuth_bins))); + int z_index = static_cast(std::floor(z_normalized * static_cast(z_bins))); + + if (azimuth_index >= azimuth_bins) + { + azimuth_index = azimuth_bins - 1; + } + if (z_index >= z_bins) + { + z_index = z_bins - 1; + } + + return azimuth_index * z_bins + z_index; } -void DNSS::dualNormalSpaceSampling(const float &sample_rate, std::vector &sampled_vertices, std::vector &sampled_normals) +void computeCenteredAndScaledVertices( + const std::vector &vertices, + std::vector &normalized_vertices) { + normalized_vertices.clear(); + normalized_vertices.resize(vertices.size(), glm::fvec3(0.0f)); + if (vertices.empty()) + { + return; + } - // Timer timer; - // timer.start(); - computeCentroidandNormalize(); - computeRotationalNormals(); - computeRotationalReturn(); + glm::fvec3 centroid(0.0f, 0.0f, 0.0f); + for (const glm::fvec3 &vertex : vertices) + { + centroid += vertex; + } + centroid /= static_cast(vertices.size()); - sortIntoBucket(); - initBucketB(); - int ndesired = int(ceil(sample_rate * m_vertices_original.size())); + float max_radius = 0.0f; + for (std::size_t i = 0; i < vertices.size(); ++i) + { + normalized_vertices[i] = vertices[i] - centroid; + max_radius = std::max(max_radius, vectorLength(normalized_vertices[i])); + } - sampled_vertices.clear(); - sampled_normals.clear(); - sampled_vertices.reserve(ndesired); - sampled_normals.reserve(ndesired); + if (max_radius <= kEpsilon) + { + return; + } - while (sampled_vertices.size() < ndesired) + const float inv_radius = 1.0f / max_radius; + for (glm::fvec3 &vertex : normalized_vertices) { - int pid = pickPoint(); + vertex *= inv_radius; + } +} + +} // namespace + +#if defined(DNSS_HAS_CUDA) +bool DNSSComputeRotationalFeaturesCuda( + const std::vector &normalized_vertices, + const std::vector &normals, + float theta_for_return_radians, + std::vector *rotational_normals, + std::vector *rotational_returns); +#endif + +NSS::NSS() + : NSS(Options{}) +{ +} + +NSS::NSS(const Options &options) + : m_options(options), m_runtime_seed(options.random_seed) +{ +} + +void NSS::setOptions(const Options &options) +{ + m_options = options; + m_runtime_seed = options.random_seed; +} + +const NSS::Options &NSS::getOptions() const +{ + return m_options; +} - sampled_vertices.emplace_back(m_vertices_original[pid]); - sampled_normals.emplace_back(m_normals_original[pid]); - // updateBucketOrder(); +void NSS::setInputCloud(const std::vector &original_vertices, const std::vector &original_normals) +{ + if (original_vertices.size() != original_normals.size()) + { + throw std::invalid_argument("NSS::setInputCloud requires matching vertex and normal counts."); } - sampled_vertices.shrink_to_fit(); - sampled_normals.shrink_to_fit(); - // timer.stop(); - // std::wcout << L"DNSS sampling time : " << timer.peek_msec() << L" (ms)" << std::endl; + m_original_vertices = original_vertices; + m_original_normals = original_normals; } -void DNSS::computeRotationalReturn() +void NSS::normalSpaceSampling(const float &sample_rate, std::vector &sampled_vertices, std::vector &sampled_normals) { - int nSize = static_cast(m_vertices_normalized.size()); - m_rotationalReturns.clear(); - m_rotationalReturns.resize(nSize); + sampled_vertices.clear(); + sampled_normals.clear(); + + const int target_count = targetSampleCount(sample_rate); + if (target_count <= 0) + { + return; + } + + const int bucket_count = m_options.azimuth_bins * m_options.z_bins; + if (bucket_count <= 0) + { + throw std::invalid_argument("NSS bucket configuration must be positive."); + } + + std::vector> buckets(static_cast(bucket_count)); + for (std::size_t index = 0; index < m_original_normals.size(); ++index) + { + int bucket_index = normalToBucketIndex(m_original_normals[index]); + if (bucket_index < 0 || bucket_index >= bucket_count) + { + bucket_index = 0; + } + buckets[static_cast(bucket_index)].push_back(static_cast(index)); + } - Concurrency::parallel_for(0, nSize, [&](int idx) - { - float dot_pn = glm::dot(glm::normalize(m_vertices_normalized[idx]), m_normals_original[idx]); - float beta = acos(dot_pn);// / (glm::l2Norm(m_vertices_normalized[idx])*glm::l2Norm(m_normals_original[idx]))); - float p_abs = glm::l2Norm(m_vertices_normalized[idx]); - float pp_abs = 2 * p_abs *sin(m_thetaForReturn / 2); + std::mt19937 rng(m_options.deterministic ? m_options.random_seed : nextSeed()); + for (std::vector &bucket : buckets) + { + std::shuffle(bucket.begin(), bucket.end(), rng); + } - float pq_abs_positive = pp_abs * cos(beta - m_thetaForReturn / 2); - float pq_abs_negative = pp_abs * cos(-beta - m_thetaForReturn / 2); - float sin_beta = sin(beta); - float cos_beta = cos(beta); + sampled_vertices.reserve(static_cast(target_count)); + sampled_normals.reserve(static_cast(target_count)); - float atanbeta_positive = atan((pq_abs_positive*sin_beta) / (p_abs - pq_abs_positive * cos_beta)); - float gamma_positive = m_thetaForReturn - atanbeta_positive; + while (sampled_vertices.size() < static_cast(target_count)) + { + bool selected_any = false; + for (std::vector &bucket : buckets) + { + if (sampled_vertices.size() >= static_cast(target_count)) + { + break; + } + if (bucket.empty()) + { + continue; + } - float atanbeta_negative = atan((pq_abs_negative*(-sin_beta)) / (p_abs - pq_abs_negative * cos_beta)); - float gamma_negative = m_thetaForReturn - atanbeta_negative; + selected_any = true; + const int point_index = bucket.back(); + bucket.pop_back(); - float rotationalReturnPositive = p_abs * gamma_positive / m_thetaForReturn; - float rotationalReturnNegative = p_abs * gamma_negative / m_thetaForReturn; + sampled_vertices.push_back(m_original_vertices[static_cast(point_index)]); + sampled_normals.push_back(m_original_normals[static_cast(point_index)]); + } - float RR = fmax(rotationalReturnPositive, rotationalReturnNegative); - m_rotationalReturns[idx] = RR; }); + if (!selected_any) + { + break; + } + } } -void DNSS::initBucketB() + +int NSS::normalToBucketIndex(const glm::fvec3 &normal) const { - m_bucketList_T.clear(); - m_bucketList_R.clear(); - m_bucketList_T.resize(m_bucketTranslation.size()); - m_bucketList_R.resize(m_bucketRotation.size()); - const int bucketsize_R = static_cast(m_bucketRotation.size()); - const int bucketsize_T = static_cast(m_bucketTranslation.size()); - for (int idx = 0; idx < bucketsize_R; idx++) - { - m_bucketList_R[idx].bucketIndex = idx; - // m_bucketRotation[idx].clear(); - if (m_bucketRotation[idx].empty()) - m_bucketList_R[idx].constraints = FLT_MAX; - else - m_bucketList_R[idx].constraints = 0.0f; - } + return sphericalBucketIndex(normal, m_options.azimuth_bins, m_options.z_bins, false); +} - for (int idx = 0; idx < bucketsize_T; idx++) +int NSS::targetSampleCount(float sample_rate) const +{ + if (!std::isfinite(sample_rate) || sample_rate <= 0.0f || m_original_vertices.empty()) { - m_bucketList_T[idx].bucketIndex = idx; - // m_bucketTranslation[idx].clear(); - if (m_bucketTranslation[idx].empty()) - m_bucketList_T[idx].constraints = FLT_MAX; - else - m_bucketList_T[idx].constraints = 0.0f; + return 0; } - typeBucket r = typeBucket::Rotation, t = typeBucket::Translation; - updateBucketOrder(r); - updateBucketOrder(t); + + const float clamped_rate = std::min(1.0f, sample_rate); + const int count = static_cast(std::ceil(clamped_rate * static_cast(m_original_vertices.size()))); + return std::clamp(count, 0, static_cast(m_original_vertices.size())); +} + +std::uint32_t NSS::nextSeed() +{ + m_runtime_seed = m_runtime_seed * 1664525u + 1013904223u; + return m_runtime_seed; +} + +DNSS::DNSS() + : DNSS(Options{}) +{ +} + +DNSS::DNSS(const Options &options) + : m_options(options), m_runtime_seed(options.random_seed) +{ +} + +void DNSS::setOptions(const Options &options) +{ + m_options = options; + m_runtime_seed = options.random_seed; } -void DNSS::sortIntoBucket() +const DNSS::Options &DNSS::getOptions() const { - int nSizePoints = static_cast(m_vertices_normalized.size()); - std::wcout << L"niszeP_points : " << nSizePoints << std::endl; - int nSizeBucketR = m_bucketsizeR_azimuth * m_bucketsizeR_polar; - int nSizeBucketT = m_bucketsizeT_azimuth * m_bucketsizeT_polar; - // Put it into a concurrent vector, sort it, and put it back into a member variable. - // Reason: concurrent_vector pop_back not available - std::vector>> bucketRotation; - std::vector> bucketTranslation; - m_vecForBIdx.clear(); - m_vecForBIdx.resize(nSizePoints); - - bucketRotation.clear(); - bucketRotation.resize(nSizeBucketR); - bucketTranslation.clear(); - bucketTranslation.resize(nSizeBucketT); - - m_bucketRotation.clear(); - m_bucketRotation.resize(nSizeBucketR); - m_bucketTranslation.clear(); - m_bucketTranslation.resize(nSizeBucketT); - - // Bucket space - they don't fit perfectly evenly into the bucket and may be crowded to one side, - // so instead of dividing by the bucket size, divide by 10 (an arbitrary number) to make sure there's enough space. - Concurrency::parallel_for(0, nSizeBucketR, [&](int idx) - { bucketRotation[idx].reserve(nSizePoints / 10); }); - Concurrency::parallel_for(0, nSizeBucketT, [&](int idx) - { bucketTranslation[idx].reserve(nSizePoints / 10); }); - - Concurrency::parallel_for(0, nSizePoints, [&](int idx) - { - float coord_R_azimuth = 0.f; - float coord_R_polar = 0.f; - computeSphericalCoordinate(glm::normalize(m_normals_rotational[idx]), coord_R_azimuth, coord_R_polar); - int index_R_azimuth = static_cast(std::floorf(fabs(coord_R_azimuth) / m_thetaForSort)); - int index_R_polar = static_cast(std::floorf(coord_R_polar / m_thetaForSort)); - int index_R = index_R_azimuth * m_bucketsizeR_polar + index_R_polar; - - bucketRotation[index_R].push_back(std::make_pair(idx, m_rotationalReturns[idx])); - - float coord_T_azimuth = 0.f; - float coord_T_polar = 0.f; - computeSphericalCoordinate(glm::normalize(m_normals_original[idx]), coord_T_azimuth, coord_T_polar); - int index_T_azimuth = static_cast(std::floorf((coord_T_azimuth + m_pi_degree) / m_thetaForSort)); - int index_T_polar = static_cast(std::floorf(coord_T_polar / m_thetaForSort)); - int index_T = index_T_azimuth * m_bucketsizeT_polar + index_T_polar; - - bucketTranslation[index_T].push_back(idx); - - m_vecForBIdx[idx] = std::make_pair(index_R, index_T); }); - - Concurrency::parallel_for(0, nSizeBucketR, [&](int idx) - { - bucketRotation[idx].shrink_to_fit(); - m_bucketRotation[idx].resize(bucketRotation[idx].size()); - std::move(bucketRotation[idx].begin(), bucketRotation[idx].end(), m_bucketRotation[idx].begin()); }); - Concurrency::parallel_for(0, nSizeBucketT, [&](int idx) - { - bucketTranslation[idx].shrink_to_fit(); - m_bucketTranslation[idx].resize(bucketTranslation[idx].size()); - std::move(bucketTranslation[idx].begin(), bucketTranslation[idx].end(), m_bucketTranslation[idx].begin()); }); - - Concurrency::parallel_for(0, nSizeBucketR, [&](int idx) - { - //ascendin order sort because only pop_back is possible - std::sort(m_bucketRotation[idx].begin(), m_bucketRotation[idx].end(), - [](const std::pair& lhs, const std::pair& rhs) { - return lhs.second < rhs.second; - }); }); - - Concurrency::parallel_for(0, nSizeBucketT, [&](int idx) - { std::random_shuffle(m_bucketTranslation[idx].begin(), m_bucketTranslation[idx].end()); }); + return m_options; } -void DNSS::computeRotationalNormals() +void DNSS::setUseCuda(bool enable) { - int nSize = static_cast(m_normals_original.size()); - m_normals_rotational.resize(m_normals_original.size()); - Concurrency::parallel_for(0, nSize, [&](int idx) - { m_normals_rotational[idx] = glm::cross(m_vertices_normalized[idx], m_normals_original[idx]); }); + m_options.enable_cuda = enable; } -void DNSS::computeCentroidandNormalize() +bool DNSS::getUseCuda() const { - Eigen::Matrix3Xf vertices = Eigen::Map(&m_vertices_original[0].x, 3, m_vertices_original.size()); - Eigen::Vector3f centroid_eigen = vertices.rowwise().mean(); - glm::fvec3 centroid = (glm::fvec3 &)(*centroid_eigen.data()); - - int nSize = static_cast(m_vertices_original.size()); - m_vertices_normalized.clear(); - m_vertices_normalized.resize(nSize); - Concurrency::parallel_for(0, nSize, [&](int idx) - { m_vertices_normalized[idx] = m_vertices_original[idx] - centroid; }); - Eigen::Matrix3Xf vertices_moved = Eigen::Map(&m_vertices_normalized[0].x, 3, m_vertices_normalized.size()); - // normalize factor - Eigen::Vector3f centroid_eigen_moved = vertices_moved.rowwise().maxCoeff(); - float Lmax = vertices_moved.colwise().norm().maxCoeff(); - float L_inverse = 1 / Lmax; - Concurrency::parallel_for(0, nSize, [&](int idx) - { m_vertices_normalized[idx] = m_vertices_normalized[idx] * L_inverse; }); + return m_options.enable_cuda; } -int DNSS::pickPoint() +void DNSS::setInputCloud(const std::vector &original_vertices, const std::vector &original_normals) { - int bid_top = 0; - typeBucket bType_top, bType_another; + if (original_vertices.size() != original_normals.size()) + { + throw std::invalid_argument("DNSS::setInputCloud requires matching vertex and normal counts."); + } + m_vertices_original = original_vertices; + m_normals_original = original_normals; +} + +void DNSS::dualNormalSpaceSampling(const float &sample_rate, std::vector &sampled_vertices, std::vector &sampled_normals) +{ + sampled_vertices.clear(); + sampled_normals.clear(); + + const int target_count = targetSampleCount(sample_rate); + if (target_count <= 0) + { + return; + } + + const int t_bucket_count = m_options.t_azimuth_bins * m_options.t_z_bins; + const int r_bucket_count = m_options.r_azimuth_bins * m_options.r_z_bins; + if (t_bucket_count <= 0 || r_bucket_count <= 0) + { + throw std::invalid_argument("DNSS bucket configuration must be positive."); + } - if (m_bucketList_R.front().constraints <= m_bucketList_T.front().constraints) + std::vector normalized_vertices; + computeCenteredAndScaledVertices(m_vertices_original, normalized_vertices); + + std::vector rotational_normals(m_normals_original.size(), glm::fvec3(0.0f)); + std::vector rotational_returns(m_normals_original.size(), 0.0f); + + bool cuda_used = false; +#if defined(DNSS_HAS_CUDA) + if (m_options.enable_cuda) { - bid_top = m_bucketList_R.front().bucketIndex; - bType_top = typeBucket::Rotation; - bType_another = typeBucket::Translation; + cuda_used = DNSSComputeRotationalFeaturesCuda( + normalized_vertices, + m_normals_original, + m_options.theta_for_return_radians, + &rotational_normals, + &rotational_returns); } - else +#endif + + if (!cuda_used) { - bid_top = m_bucketList_T.front().bucketIndex; - bType_top = typeBucket::Translation; - bType_another = typeBucket::Rotation; + for (std::size_t i = 0; i < normalized_vertices.size(); ++i) + { + rotational_normals[i] = crossProduct(normalized_vertices[i], m_normals_original[i]); + rotational_returns[i] = computeRotationalReturnValue( + normalized_vertices[i], + m_normals_original[i], + m_options.theta_for_return_radians); + } } - int pid = -1; - switch (bType_top) + std::vector> t_buckets(static_cast(t_bucket_count)); + + struct RotationEntry { - case typeBucket::Rotation: + float rotational_return; + int point_index; + }; + struct RotationEntryComparator { - if (m_bucketRotation[bid_top].empty()) + bool operator()(const RotationEntry &lhs, const RotationEntry &rhs) const { - std::wcout << L"bucket id : " << bid_top << " and bucket size : " << m_bucketRotation[bid_top].size() << std::endl; - std::wcout << L"constraints : " << m_bucketList_R[bid_top].constraints << std::endl; + return lhs.rotational_return < rhs.rotational_return; } + }; - pid = m_bucketRotation[bid_top].back().first; + using RotationQueue = std::priority_queue, RotationEntryComparator>; + std::vector r_buckets(static_cast(r_bucket_count)); - m_bucketList_R.front().constraints += m_bucketRotation[bid_top].back().second; - m_bucketRotation[bid_top].pop_back(); + std::vector point_to_t_bucket(m_vertices_original.size(), 0); + std::vector point_to_r_bucket(m_vertices_original.size(), 0); - int bid_T = m_vecForBIdx[pid].second; + for (std::size_t point_index = 0; point_index < m_vertices_original.size(); ++point_index) + { + int t_bucket = translationalBucketIndex(m_normals_original[point_index]); + int r_bucket = rotationalBucketIndex(rotational_normals[point_index]); - if (!m_bucketTranslation[bid_T].empty()) + if (t_bucket < 0 || t_bucket >= t_bucket_count) { - m_bucketTranslation[bid_T].erase(std::remove_if(m_bucketTranslation[bid_T].begin(), m_bucketTranslation[bid_T].end(), - [&pid](const int &elem) - { return elem == pid; }), - m_bucketTranslation[bid_T].end()); + t_bucket = 0; } - if (m_bucketTranslation[bid_T].empty()) + if (r_bucket < 0 || r_bucket >= r_bucket_count) { - auto it = std::find_if(m_bucketList_T.begin(), m_bucketList_T.end(), - [bid_T](const structBucket &element) - { return element.bucketIndex == bid_T; }); - it->constraints = FLT_MAX; - updateBucketOrder(bType_another); + r_bucket = 0; } - if (m_bucketRotation[bid_top].empty()) + t_buckets[static_cast(t_bucket)].push_back(static_cast(point_index)); + r_buckets[static_cast(r_bucket)].push( + RotationEntry{rotational_returns[point_index], static_cast(point_index)}); + + point_to_t_bucket[point_index] = t_bucket; + point_to_r_bucket[point_index] = r_bucket; + } + + std::mt19937 rng(m_options.deterministic ? m_options.random_seed : nextSeed()); + for (std::vector &bucket : t_buckets) + { + std::shuffle(bucket.begin(), bucket.end(), rng); + } + + std::vector t_constraints(static_cast(t_bucket_count), 0.0f); + std::vector r_constraints(static_cast(r_bucket_count), 0.0f); + std::vector is_active(m_vertices_original.size(), 1u); + + sampled_vertices.reserve(static_cast(target_count)); + sampled_normals.reserve(static_cast(target_count)); + + auto popTranslationalPoint = [&](int bucket_index) -> int + { + auto &bucket = t_buckets[static_cast(bucket_index)]; + while (!bucket.empty() && is_active[static_cast(bucket.back())] == 0u) + { + bucket.pop_back(); + } + if (bucket.empty()) { - m_bucketList_R.front().constraints = FLT_MAX; + return -1; + } + const int point_index = bucket.back(); + bucket.pop_back(); + return point_index; + }; + + auto popRotationalPoint = [&](int bucket_index, float *rotational_weight) -> int + { + auto &bucket = r_buckets[static_cast(bucket_index)]; + while (!bucket.empty() && is_active[static_cast(bucket.top().point_index)] == 0u) + { + bucket.pop(); + } + if (bucket.empty()) + { + *rotational_weight = 0.0f; + return -1; + } + const RotationEntry entry = bucket.top(); + bucket.pop(); + *rotational_weight = entry.rotational_return; + return entry.point_index; + }; + + auto selectPoint = [&](int point_index) + { + if (point_index < 0) + { + return; + } + if (is_active[static_cast(point_index)] == 0u) + { + return; + } + + is_active[static_cast(point_index)] = 0u; + sampled_vertices.push_back(m_vertices_original[static_cast(point_index)]); + sampled_normals.push_back(m_normals_original[static_cast(point_index)]); + + const int t_bucket = point_to_t_bucket[static_cast(point_index)]; + const int r_bucket = point_to_r_bucket[static_cast(point_index)]; + t_constraints[static_cast(t_bucket)] += 1.0f; + r_constraints[static_cast(r_bucket)] += rotational_returns[static_cast(point_index)]; + }; + + if (m_options.initialize_rotation_buckets) + { + for (int r_bucket = 0; r_bucket < r_bucket_count; ++r_bucket) + { + if (sampled_vertices.size() >= static_cast(target_count)) + { + break; + } + float weight = 0.0f; + const int point_index = popRotationalPoint(r_bucket, &weight); + (void)weight; + selectPoint(point_index); } - updateBucketOrder(bType_top); - break; } - case typeBucket::Translation: + + while (sampled_vertices.size() < static_cast(target_count)) { - if (m_bucketTranslation[bid_top].empty()) + enum class BucketType + { + None, + Translational, + Rotational + }; + + BucketType best_type = BucketType::None; + int best_bucket_index = -1; + float best_constraint = std::numeric_limits::max(); + + for (int t_bucket = 0; t_bucket < t_bucket_count; ++t_bucket) { - std::wcout << L"bucket id : " << bid_top << " and bucket size : " << m_bucketTranslation[bid_top].size() << std::endl; - std::wcout << L"constraints : " << m_bucketList_T[bid_top].constraints << std::endl; + auto &bucket = t_buckets[static_cast(t_bucket)]; + while (!bucket.empty() && is_active[static_cast(bucket.back())] == 0u) + { + bucket.pop_back(); + } + if (bucket.empty()) + { + continue; + } + + const float constraint = t_constraints[static_cast(t_bucket)]; + if (constraint < best_constraint) + { + best_constraint = constraint; + best_bucket_index = t_bucket; + best_type = BucketType::Translational; + } } - pid = m_bucketTranslation[bid_top].back(); - m_bucketList_T.front().constraints += 1.0f; - m_bucketTranslation[bid_top].pop_back(); + for (int r_bucket = 0; r_bucket < r_bucket_count; ++r_bucket) + { + auto &bucket = r_buckets[static_cast(r_bucket)]; + while (!bucket.empty() && is_active[static_cast(bucket.top().point_index)] == 0u) + { + bucket.pop(); + } + if (bucket.empty()) + { + continue; + } - int bid_R = m_vecForBIdx[pid].first; + const float constraint = r_constraints[static_cast(r_bucket)]; + if (constraint < best_constraint) + { + best_constraint = constraint; + best_bucket_index = r_bucket; + best_type = BucketType::Rotational; + } + } - if (!m_bucketRotation[bid_R].empty()) + if (best_type == BucketType::None) { - m_bucketRotation[bid_R].erase(std::remove_if(m_bucketRotation[bid_R].begin(), m_bucketRotation[bid_R].end(), - [&pid](const std::pair &elem) - { return elem.first == pid; }), - m_bucketRotation[bid_R].end()); + break; } - if (m_bucketRotation[bid_R].empty()) + + if (best_type == BucketType::Translational) { - auto it = std::find_if(m_bucketList_R.begin(), m_bucketList_R.end(), - [bid_R](const structBucket &element) - { return element.bucketIndex == bid_R; }); - it->constraints = FLT_MAX; - updateBucketOrder(bType_another); + selectPoint(popTranslationalPoint(best_bucket_index)); } - if (m_bucketTranslation[bid_top].empty()) + else { - m_bucketList_T.front().constraints = FLT_MAX; + float rotational_weight = 0.0f; + selectPoint(popRotationalPoint(best_bucket_index, &rotational_weight)); } - updateBucketOrder(bType_top); - break; } + + if (sampled_vertices.size() < static_cast(target_count)) + { + for (std::size_t point_index = 0; point_index < is_active.size(); ++point_index) + { + if (sampled_vertices.size() >= static_cast(target_count)) + { + break; + } + if (is_active[point_index] == 0u) + { + continue; + } + selectPoint(static_cast(point_index)); + } } - return pid; } -void DNSS::updateBucketOrder(typeBucket &bType) + +int DNSS::translationalBucketIndex(const glm::fvec3 &normal) const { - switch (bType) - { - case typeBucket::Rotation: - std::sort(m_bucketList_R.begin(), m_bucketList_R.end(), - [](const structBucket &lhs, const structBucket &rhs) - { - return lhs.constraints < rhs.constraints; - }); - break; - case typeBucket::Translation: - std::sort(m_bucketList_T.begin(), m_bucketList_T.end(), - [](const structBucket &lhs, const structBucket &rhs) - { - return lhs.constraints < rhs.constraints; - }); - break; + return sphericalBucketIndex(normal, m_options.t_azimuth_bins, m_options.t_z_bins, false); +} + +int DNSS::rotationalBucketIndex(const glm::fvec3 &rotational_normal) const +{ + return sphericalBucketIndex(rotational_normal, m_options.r_azimuth_bins, m_options.r_z_bins, true); +} + +int DNSS::targetSampleCount(float sample_rate) const +{ + if (!std::isfinite(sample_rate) || sample_rate <= 0.0f || m_vertices_original.empty()) + { + return 0; } + + const float clamped_rate = std::min(1.0f, sample_rate); + const int count = static_cast(std::ceil(clamped_rate * static_cast(m_vertices_original.size()))); + return std::clamp(count, 0, static_cast(m_vertices_original.size())); } -void DNSS::computeSphericalCoordinate(const glm::fvec3 &normal, float &coordinates_azimuth, float &coordinates_polar) +std::uint32_t DNSS::nextSeed() { - float radian_azimuth = atan2(normal.y, normal.x); - float radian_polar = acos(normal.z); - coordinates_azimuth = radian_azimuth * m_pi_degree / m_pi_radian; - coordinates_polar = radian_polar * m_pi_degree / m_pi_radian; + m_runtime_seed = m_runtime_seed * 1664525u + 1013904223u; + return m_runtime_seed; } diff --git a/NSS.h b/NSS.h index d19bdf5..81a6e15 100644 --- a/NSS.h +++ b/NSS.h @@ -1,154 +1,193 @@ #pragma once -#include -#include +#include #include -#include -#include -#include -#include -#include -#include - -/* - * normal space sampling algorithm -* - * - * original source code: https://github.com/kyzyx/scanalyze/ - *Scanalyze (version 1.0) license information - ------------------------------------------- - - Scanalyze is copyright (c) 2002 the Board of Trustees of The Leland - Stanford Junior University. All rights reserved. - - This software is covered by the Stanford Computer Graphics Laboratory's - General Software License. This license is royalty-free, nonexclusive, - and nontransferable. The terms and conditions of this license are - viewable at this URL: - - http://graphics.stanford.edu/software/license.html - - - The scanalyze software also builds upon a number of 3rd party - software components, some of which may be copyrighted: - - 1. Togl - 2. Tcl/Tk - 3. STL - 4. Template Numerical Toolkit (TNT) - 5. Janne Heikkila's Camera Calibration Toolbox - 6. Graphics Gems - 7. Miscellaneous SGI code - - Further information about each of these software components and - their terms of usage is included below: - - ---------------------------------------------------------------------- - * - **/ + +#if __has_include() +#include +#else +namespace glm +{ +struct fvec3 +{ + float x; + float y; + float z; + + constexpr fvec3() + : x(0.0f), y(0.0f), z(0.0f) + { + } + + constexpr explicit fvec3(float value) + : x(value), y(value), z(value) + { + } + + constexpr fvec3(float x_value, float y_value, float z_value) + : x(x_value), y(y_value), z(z_value) + { + } + + float &operator[](int index) + { + return (&x)[index]; + } + + const float &operator[](int index) const + { + return (&x)[index]; + } + + fvec3 &operator+=(const fvec3 &rhs) + { + x += rhs.x; + y += rhs.y; + z += rhs.z; + return *this; + } + + fvec3 &operator-=(const fvec3 &rhs) + { + x -= rhs.x; + y -= rhs.y; + z -= rhs.z; + return *this; + } + + fvec3 &operator*=(float scalar) + { + x *= scalar; + y *= scalar; + z *= scalar; + return *this; + } + + fvec3 &operator/=(float scalar) + { + x /= scalar; + y /= scalar; + z /= scalar; + return *this; + } +}; + +inline fvec3 operator+(fvec3 lhs, const fvec3 &rhs) +{ + lhs += rhs; + return lhs; +} + +inline fvec3 operator-(fvec3 lhs, const fvec3 &rhs) +{ + lhs -= rhs; + return lhs; +} + +inline fvec3 operator-(const fvec3 &value) +{ + return fvec3(-value.x, -value.y, -value.z); +} + +inline fvec3 operator*(fvec3 lhs, float scalar) +{ + lhs *= scalar; + return lhs; +} + +inline fvec3 operator*(float scalar, fvec3 rhs) +{ + rhs *= scalar; + return rhs; +} + +inline fvec3 operator/(fvec3 lhs, float scalar) +{ + lhs /= scalar; + return lhs; +} +} // namespace glm +#endif + class NSS { public: - /** \brief Function to perform normal space sampling - * \param[in] sample_rate The sampling rate, maximum 1.0. Repeat until the number is filled by multiplying the rate. + struct Options + { + int azimuth_bins = 12; + int z_bins = 6; + std::uint32_t random_seed = 0x12345678u; + bool deterministic = true; + }; + + NSS(); + explicit NSS(const Options &options); + + void setOptions(const Options &options); + const Options &getOptions() const; + + /** \brief Function to perform normal space sampling. + * \param[in] sample_rate The sampling rate in [0, 1]. * \param[out] sampled_vertices Data of sampled points. * \param[out] sampled_normals Data of sampled normals. */ void normalSpaceSampling(const float &sample_rate, std::vector &sampled_vertices, std::vector &sampled_normals); - /** \brief Function to specify the original data for sampling - * \param[in] pointData Point data. - * \param[in] normalData Normal data. + /** \brief Function to specify the original data for sampling. + * \param[in] original_vertices Point data. + * \param[in] original_normals Normal data. */ - void setInputCloud(const std::vector original_vertices, const std::vector original_normals); + void setInputCloud(const std::vector &original_vertices, const std::vector &original_normals); private: - /** \brief Create indices based on the normal values and then store the index values in buckets. - * \param[in] const std::vector &n A series of normal values. - * \param[out] std::vector< std::vector> Buckets containing indices. - */ - void sort_into_buckets(const std::vector &n, std::vector> &normbuckets); + int normalToBucketIndex(const glm::fvec3 &normal) const; + int targetSampleCount(float sample_rate) const; + std::uint32_t nextSeed(); private: + Options m_options; + std::uint32_t m_runtime_seed = m_options.random_seed; std::vector m_original_vertices; std::vector m_original_normals; }; -/* - * Dual Normal Space Sampling - * Kwok, Tsz-Ho. "DNSS: Dual-Normal-Space Sampling for 3-D ICP Registration." - * IEEE Transactions on Automation Science and Engineering (2018). - * Sampling including rotation information in addition to the original NSS. - * Adding Rotational Information to NSS - * Author: seweon.jeon - * Date: 2018.04.09 - **/ - class DNSS { public: - void setInputCloud(const std::vector original_vertices, const std::vector original_normals); - void dualNormalSpaceSampling(const float &sample_rate, std::vector &sampled_vertices, std::vector &sampled_normals); - -private: - struct structBucket + struct Options { - int bucketIndex = 0; - float constraints = 0; + int t_azimuth_bins = 12; + int t_z_bins = 6; + int r_azimuth_bins = 6; + int r_z_bins = 6; + float theta_for_return_radians = 0.78539816339f; // pi / 4 + bool initialize_rotation_buckets = true; + bool enable_cuda = false; + std::uint32_t random_seed = 0x9e3779b9u; + bool deterministic = true; }; - enum class typeBucket : int - { - Rotation = 0, - Translation, - }; + DNSS(); + explicit DNSS(const Options &options); - void computeRotationalReturn(); - void initBucketB(); - void sortIntoBucket(); - void computeRotationalNormals(); - void computeCentroidandNormalize(); - - int pickPoint(); - - void updateBucketOrder(typeBucket &bType); - /** - * \brief Calculates azimuth and polar values based on Normal (nx, ny, nz) - * \note The naming of azimuth and polar angles differ in the paper and ISO rule, so this follows the ISO rule - * ISO rule: theta - polar angle (0~ pi), phi - azimuth angle (0~2pi) - * Paper: theta - azimuth angle (0~2pi), phi - polar angle (0~pi) - * pi = 180; 2pi = 360; - * \param[in] glm::fvec3 Normal value. - * \param[out] float coordinates_azimuth. - * \param[out] float coordinates_polar. - */ - void computeSphericalCoordinate(const glm::fvec3 &normal, float &coordinates_azimuth, float &coordinates_polar); + void setOptions(const Options &options); + const Options &getOptions() const; + + void setUseCuda(bool enable); + bool getUseCuda() const; + + void setInputCloud(const std::vector &original_vertices, const std::vector &original_normals); + void dualNormalSpaceSampling(const float &sample_rate, std::vector &sampled_vertices, std::vector &sampled_normals); + +private: + int translationalBucketIndex(const glm::fvec3 &normal) const; + int rotationalBucketIndex(const glm::fvec3 &rotational_normal) const; + int targetSampleCount(float sample_rate) const; + std::uint32_t nextSeed(); private: - std::vector m_bucketList_R; - std::vector m_bucketList_T; - std::vector> m_vecForBIdx; - std::vector>> m_bucketRotation; - std::vector> m_bucketTranslation; + Options m_options; + std::uint32_t m_runtime_seed = m_options.random_seed; std::vector m_vertices_original; std::vector m_normals_original; - std::vector m_vertices_normalized; - std::vector m_normals_rotational; - std::vector m_rotationalReturns; - - /* - * \note The naming of azimuth and polar angles differ in the paper and ISO rule, so this follows the ISO rule - * ISO rule: theta - polar angle (0~180), phi - azimuth angle (0~360) - * Paper: theta - azimuth angle (0~360), phi - polar angle (0~180) - */ - static const int m_bucketsizeT_azimuth = 12; - static const int m_bucketsizeT_polar = 6; - static const int m_bucketsizeR_azimuth = 6; - static const int m_bucketsizeR_polar = 6; - - const float m_pi_degree = 180.0f; - const float m_pi_radian = 3.141592f; - const float m_pi2_degree = 360.0f; - const float m_thetaForSort = m_pi_degree / 6; - const float m_thetaForReturn = m_pi_degree / 4; -}; \ No newline at end of file +}; diff --git a/README.md b/README.md index de38a01..4da9f82 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,55 @@ -# DNSS -Normal Space Sampling and Dual Normal Space Sampling +# DNSS / NSS -An Implementation of DNSS and NSS. +Clean C++ implementation of: -I am not author of original paper. -I just implemented it. +- NSS: Normal-Space Sampling for ICP point selection. +- DNSS: Dual-Normal-Space Sampling from + Tsz-Ho Kwok, "DNSS: Dual-Normal-Space Sampling for 3-D ICP Registration," IEEE T-ASE, 2018. -NSS: Normal Space Sampling -DNSS: Dual Normal Space Sampling +## What This Version Fixes -parameters are configured same as original paper. +- Rewrites legacy code into readable, modular C++17. +- Adds missing method implementations (`setInputCloud`) and input validation. +- Fixes rotational-return math to use radians consistently. +- Uses efficient lazy removal (active-mask + heap cleanup) instead of repeated `erase` on vectors. +- Adds optional CUDA acceleration for DNSS rotational feature computation. -Kwok, Tsz-Ho. "DNSS: Dual-Normal-Space Sampling for 3-D ICP Registration." -IEEE Transactions on Automation Science and Engineering (2018). +## Algorithm Notes + +DNSS follows the paper's main idea: + +1. Center and scale points by `Lmax`. +2. Build translational normal space from original normals. +3. Build rotational normal space from `nr = p x n`. +4. Compute rotational return and use it as the rotational-bucket constraint increment. +5. Iteratively pick points from the currently least-constrained bucket across both spaces. + +Default bucket settings: + +- Translational space: `12 x 6` +- Rotational space: `6 x 6` +- Return angle `theta`: `pi / 4` + +## Build + +CPU only: + +```bash +cmake -S . -B build +cmake --build build +``` + +With CUDA (if CUDA toolkit is installed): + +```bash +cmake -S . -B build -DDNSS_ENABLE_CUDA=ON +cmake --build build +``` + +When CUDA is enabled, call `DNSS::setUseCuda(true)` to request GPU acceleration. If GPU execution fails at runtime, the implementation falls back to the CPU path. + +## References + +- DNSS paper (2018): https://ieeexplore.ieee.org/document/8375750 +- DNSS open-access repository page: https://spectrum.library.concordia.ca/id/eprint/984398/ +- NSS/ICP variant context: Rusinkiewicz and Levoy, "Efficient Variants of the ICP Algorithm," 3DIM 2001. diff --git a/dnss_cuda.cu b/dnss_cuda.cu new file mode 100644 index 0000000..ada8572 --- /dev/null +++ b/dnss_cuda.cu @@ -0,0 +1,217 @@ +#include +#include + +#include + +#include "NSS.h" + +namespace +{ +constexpr float kEpsilon = 1e-7f; + +__device__ float clampFloat(float value, float min_value, float max_value) +{ + return fmaxf(min_value, fminf(value, max_value)); +} + +__global__ void computeRotationalFeaturesKernel( + const float *normalized_vertices, + const float *normals, + float theta, + float *rotational_normals, + float *rotational_returns, + int point_count) +{ + const int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= point_count) + { + return; + } + + const int base = 3 * index; + const float px = normalized_vertices[base + 0]; + const float py = normalized_vertices[base + 1]; + const float pz = normalized_vertices[base + 2]; + const float nx = normals[base + 0]; + const float ny = normals[base + 1]; + const float nz = normals[base + 2]; + + // Rotational normal nr = p x n. + rotational_normals[base + 0] = py * nz - pz * ny; + rotational_normals[base + 1] = pz * nx - px * nz; + rotational_normals[base + 2] = px * ny - py * nx; + + const float point_norm = sqrtf(px * px + py * py + pz * pz); + if (!(isfinite(point_norm)) || point_norm <= kEpsilon) + { + rotational_returns[index] = 0.0f; + return; + } + + float normal_norm = sqrtf(nx * nx + ny * ny + nz * nz); + if (!(isfinite(normal_norm)) || normal_norm <= kEpsilon) + { + normal_norm = 1.0f; + } + + const float pdx = px / point_norm; + const float pdy = py / point_norm; + const float pdz = pz / point_norm; + const float ndx = nx / normal_norm; + const float ndy = ny / normal_norm; + const float ndz = nz / normal_norm; + + const float dot_pn = clampFloat(pdx * ndx + pdy * ndy + pdz * ndz, -1.0f, 1.0f); + const float beta = acosf(dot_pn); + const float pp = 2.0f * point_norm * sinf(theta * 0.5f); + const float sin_beta = sinf(beta); + const float cos_beta = cosf(beta); + + const float pq_positive = pp * cosf(beta - theta * 0.5f); + const float pq_negative = pp * cosf(-beta - theta * 0.5f); + + const float denominator_positive = fmaxf(kEpsilon, point_norm - pq_positive * cos_beta); + const float denominator_negative = fmaxf(kEpsilon, point_norm - pq_negative * cos_beta); + + const float atan_positive = atanf((pq_positive * sin_beta) / denominator_positive); + const float atan_negative = atanf((pq_negative * (-sin_beta)) / denominator_negative); + + const float gamma_positive = theta - atan_positive; + const float gamma_negative = theta - atan_negative; + + const float rr_positive = point_norm * gamma_positive / theta; + const float rr_negative = point_norm * gamma_negative / theta; + + const float rr = fmaxf(rr_positive, rr_negative); + if (!isfinite(rr) || rr < 0.0f) + { + rotational_returns[index] = 0.0f; + } + else + { + rotational_returns[index] = rr; + } +} + +bool isCudaSuccess(cudaError_t status) +{ + return status == cudaSuccess; +} + +} // namespace + +bool DNSSComputeRotationalFeaturesCuda( + const std::vector &normalized_vertices, + const std::vector &normals, + float theta_for_return_radians, + std::vector *rotational_normals, + std::vector *rotational_returns) +{ + if (rotational_normals == nullptr || rotational_returns == nullptr) + { + return false; + } + if (normalized_vertices.size() != normals.size()) + { + return false; + } + + const int point_count = static_cast(normalized_vertices.size()); + rotational_normals->assign(normalized_vertices.size(), glm::fvec3(0.0f)); + rotational_returns->assign(normalized_vertices.size(), 0.0f); + + if (point_count == 0) + { + return true; + } + + std::vector vertices_flat(static_cast(point_count) * 3u); + std::vector normals_flat(static_cast(point_count) * 3u); + for (int i = 0; i < point_count; ++i) + { + vertices_flat[3 * i + 0] = normalized_vertices[static_cast(i)].x; + vertices_flat[3 * i + 1] = normalized_vertices[static_cast(i)].y; + vertices_flat[3 * i + 2] = normalized_vertices[static_cast(i)].z; + normals_flat[3 * i + 0] = normals[static_cast(i)].x; + normals_flat[3 * i + 1] = normals[static_cast(i)].y; + normals_flat[3 * i + 2] = normals[static_cast(i)].z; + } + + float *d_vertices = nullptr; + float *d_normals = nullptr; + float *d_rot_normals = nullptr; + float *d_rot_returns = nullptr; + + const std::size_t vector_bytes = vertices_flat.size() * sizeof(float); + const std::size_t return_bytes = rotational_returns->size() * sizeof(float); + + if (!isCudaSuccess(cudaMalloc(&d_vertices, vector_bytes)) || + !isCudaSuccess(cudaMalloc(&d_normals, vector_bytes)) || + !isCudaSuccess(cudaMalloc(&d_rot_normals, vector_bytes)) || + !isCudaSuccess(cudaMalloc(&d_rot_returns, return_bytes))) + { + cudaFree(d_vertices); + cudaFree(d_normals); + cudaFree(d_rot_normals); + cudaFree(d_rot_returns); + return false; + } + + if (!isCudaSuccess(cudaMemcpy(d_vertices, vertices_flat.data(), vector_bytes, cudaMemcpyHostToDevice)) || + !isCudaSuccess(cudaMemcpy(d_normals, normals_flat.data(), vector_bytes, cudaMemcpyHostToDevice))) + { + cudaFree(d_vertices); + cudaFree(d_normals); + cudaFree(d_rot_normals); + cudaFree(d_rot_returns); + return false; + } + + const int threads = 256; + const int blocks = (point_count + threads - 1) / threads; + computeRotationalFeaturesKernel<<>>( + d_vertices, + d_normals, + theta_for_return_radians, + d_rot_normals, + d_rot_returns, + point_count); + + if (!isCudaSuccess(cudaGetLastError()) || !isCudaSuccess(cudaDeviceSynchronize())) + { + cudaFree(d_vertices); + cudaFree(d_normals); + cudaFree(d_rot_normals); + cudaFree(d_rot_returns); + return false; + } + + std::vector rot_normals_flat(vertices_flat.size()); + std::vector rot_returns_flat(rotational_returns->size()); + + if (!isCudaSuccess(cudaMemcpy(rot_normals_flat.data(), d_rot_normals, vector_bytes, cudaMemcpyDeviceToHost)) || + !isCudaSuccess(cudaMemcpy(rot_returns_flat.data(), d_rot_returns, return_bytes, cudaMemcpyDeviceToHost))) + { + cudaFree(d_vertices); + cudaFree(d_normals); + cudaFree(d_rot_normals); + cudaFree(d_rot_returns); + return false; + } + + cudaFree(d_vertices); + cudaFree(d_normals); + cudaFree(d_rot_normals); + cudaFree(d_rot_returns); + + for (int i = 0; i < point_count; ++i) + { + (*rotational_normals)[static_cast(i)] = glm::fvec3( + rot_normals_flat[3 * i + 0], + rot_normals_flat[3 * i + 1], + rot_normals_flat[3 * i + 2]); + (*rotational_returns)[static_cast(i)] = rot_returns_flat[static_cast(i)]; + } + + return true; +} From 4bb5106961b499d59513bdd7f18b5764f0c534be Mon Sep 17 00:00:00 2001 From: "sewon.jeon" Date: Sun, 8 Feb 2026 22:21:48 +0900 Subject: [PATCH 2/2] ai code review --- .github/workflows/pr-agent.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr-agent.yml b/.github/workflows/pr-agent.yml index 7ee9478..5fb7c3f 100644 --- a/.github/workflows/pr-agent.yml +++ b/.github/workflows/pr-agent.yml @@ -30,14 +30,14 @@ jobs: CONFIG__CUSTOM_MODEL_MAX_TOKENS: "32768" CONFIG__RESPONSE_LANGUAGE: "ko-KR" - PR_REVIEWER__NUM_CODE_SUGGESTIONS: "3" + PR_REVIEWER__NUM_CODE_SUGGESTIONS: "5" PR_REVIEWER__REQUIRE_SECURITY_REVIEW: "true" PR_REVIEWER__EXTRA_INSTRUCTIONS: "버그, 성능 문제, 메모리 누수를 분석해주세요." PR_CODE_SUGGESTIONS__NUM_CODE_SUGGESTIONS: "5" PR_CODE_SUGGESTIONS__EXTRA_INSTRUCTIONS: "구체적인 코드 수정안을 제시해주세요." - PR_DESCRIPTION__PUBLISH_LABELS: "true" + PR_DESCRIPTION__ADD_ORIGINAL_USER_DESCRIPTION: "true" run: |