Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion ci/run_ctests.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail
Expand All @@ -26,3 +26,11 @@ for gt in "${GTEST_DIR}"/*_TEST; do
echo "Running gtest ${test_name}"
"${gt}" "$@"
done

# Run C_API_TEST with CPU memory for local solves (excluding time limit tests)
if [ -x "${GTEST_DIR}/C_API_TEST" ]; then
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this intentional?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the idea is to test the CPU memory path in a roundtrip. The easiest way to do that is in test mode actually copy the cpu data to gpu and do an actual solve, then copy the solution back. This effectively tests the conversions that will be in the gRPC server after/before serialization, but also the entire path through the API. We leave off the timeout tests because they're not really necessary for this, and at least locally for me they fail.

echo "Running gtest C_API_TEST with CUOPT_USE_CPU_MEM_FOR_LOCAL"
CUOPT_USE_CPU_MEM_FOR_LOCAL=1 "${GTEST_DIR}/C_API_TEST" --gtest_filter=-c_api/TimeLimitTestFixture.* "$@"
else
echo "Skipping C_API_TEST with CUOPT_USE_CPU_MEM_FOR_LOCAL (binary not found)"
fi
64 changes: 44 additions & 20 deletions cpp/cuopt_cli.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */

#include <cuopt/linear_programming/backend_selection.hpp>
#include <cuopt/linear_programming/mip/solver_settings.hpp>
#include <cuopt/linear_programming/optimization_problem.hpp>
#include <cuopt/linear_programming/optimization_problem_interface.hpp>
#include <cuopt/linear_programming/optimization_problem_utils.hpp>
#include <cuopt/linear_programming/solve.hpp>
#include <mps_parser/parser.hpp>
#include <utilities/logger.hpp>
Expand Down Expand Up @@ -89,7 +92,6 @@ int run_single_file(const std::string& file_path,
bool solve_relaxation,
const std::map<std::string, std::string>& settings_strings)
{
const raft::handle_t handle_{};
cuopt::linear_programming::solver_settings_t<int, double> settings;

try {
Expand Down Expand Up @@ -122,13 +124,31 @@ int run_single_file(const std::string& file_path,
return -1;
}

auto op_problem =
cuopt::linear_programming::mps_data_model_to_optimization_problem(&handle_, mps_data_model);
// Determine memory backend and create problem using interface
// Create handle only for GPU memory backend (avoid CUDA init on CPU-only hosts)
auto memory_backend = cuopt::linear_programming::get_memory_backend_type();
std::unique_ptr<raft::handle_t> handle_ptr;
std::unique_ptr<cuopt::linear_programming::optimization_problem_interface_t<int, double>>
problem_interface;

if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) {
handle_ptr = std::make_unique<raft::handle_t>();
problem_interface =
std::make_unique<cuopt::linear_programming::gpu_optimization_problem_t<int, double>>(
handle_ptr.get());
} else {
problem_interface =
std::make_unique<cuopt::linear_programming::cpu_optimization_problem_t<int, double>>(nullptr);
}

// Populate the problem from MPS data model
cuopt::linear_programming::populate_from_mps_data_model(problem_interface.get(), mps_data_model);

const bool is_mip =
(op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::MIP ||
op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::IP) &&
!solve_relaxation;
const bool is_mip = (problem_interface->get_problem_category() ==
cuopt::linear_programming::problem_category_t::MIP ||
problem_interface->get_problem_category() ==
cuopt::linear_programming::problem_category_t::IP) &&
!solve_relaxation;

try {
auto initial_solution =
Expand Down Expand Up @@ -157,10 +177,10 @@ int run_single_file(const std::string& file_path,
try {
if (is_mip) {
auto& mip_settings = settings.get_mip_settings();
auto solution = cuopt::linear_programming::solve_mip(op_problem, mip_settings);
auto solution = cuopt::linear_programming::solve_mip(problem_interface.get(), mip_settings);
} else {
auto& lp_settings = settings.get_pdlp_settings();
auto solution = cuopt::linear_programming::solve_lp(op_problem, lp_settings);
auto solution = cuopt::linear_programming::solve_lp(problem_interface.get(), lp_settings);
}
} catch (const std::exception& e) {
CUOPT_LOG_ERROR("Error: %s", e.what());
Expand Down Expand Up @@ -334,19 +354,23 @@ int main(int argc, char* argv[])
const auto initial_solution_file = program.get<std::string>("--initial-solution");
const auto solve_relaxation = program.get<bool>("--relaxation");

// All arguments are parsed as string, default values are parsed as int if unused.
const auto num_gpus = program.is_used("--num-gpus")
? std::stoi(program.get<std::string>("--num-gpus"))
: program.get<int>("--num-gpus");

// Only initialize CUDA resources if using GPU memory backend (not remote execution)
auto memory_backend = cuopt::linear_programming::get_memory_backend_type();
std::vector<std::shared_ptr<rmm::mr::device_memory_resource>> memory_resources;

for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
cudaSetDevice(i);
memory_resources.push_back(make_async());
rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) {
// All arguments are parsed as string, default values are parsed as int if unused.
const auto num_gpus = program.is_used("--num-gpus")
? std::stoi(program.get<std::string>("--num-gpus"))
: program.get<int>("--num-gpus");

for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
RAFT_CUDA_TRY(cudaSetDevice(i));
memory_resources.push_back(make_async());
rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
}
RAFT_CUDA_TRY(cudaSetDevice(0));
}
Comment thread
tmckayus marked this conversation as resolved.
cudaSetDevice(0);

return run_single_file(file_name, initial_solution_file, solve_relaxation, settings_strings);
}
69 changes: 69 additions & 0 deletions cpp/include/cuopt/linear_programming/backend_selection.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */

#pragma once

namespace cuopt::linear_programming {

/**
* @brief Enum for execution mode (local vs remote solve)
*/
enum class execution_mode_t {
LOCAL, ///< Solve locally on this machine
REMOTE ///< Solve remotely via gRPC
};

/**
* @brief Enum for memory backend type (GPU vs CPU memory)
*/
enum class memory_backend_t {
GPU, ///< Use GPU memory (device memory via RMM)
CPU ///< Use CPU memory (host memory)
};

/**
* @brief Check if remote execution is enabled via environment variables
* @return true if both CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT are set
*/
bool is_remote_execution_enabled();

/**
* @brief Determine execution mode based on environment variables
*
* @return execution_mode_t::REMOTE if CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT are set,
* execution_mode_t::LOCAL otherwise
*/
execution_mode_t get_execution_mode();

/**
* @brief Check if GPU memory should be used for remote execution
* @return true if CUOPT_USE_GPU_MEM_FOR_REMOTE is set to "true" or "1" (case-insensitive)
*/
bool use_gpu_memory_for_remote();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will there be cases for using gpu memory in the remote?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left this path here in case we invent some features in the future where you may want to

  1. do gpu manipulation of a problem or solution on the local GPU host (maybe some post-processing of an LP in a chain of LPs before you submit another one? or pre-processing after you've built the problem but before you solve? We don't have those features today but maybe we will some day)

  2. but you want to solve on a remote host because maybe you have an L40 locally that is good for pre/post processing but you want to solve the problem on a GB200.

We could remove it, strictly future proofing but easy to support. What do you think?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets remove this for now. We can add it easily as you said.


/**
* @brief Check if CPU memory should be used for local execution (test mode)
*
* This is intended for testing CPU problem/solution structures without remote execution.
* When enabled, local solve will convert CPU problems to GPU, solve, and convert back.
*
* @return true if CUOPT_USE_CPU_MEM_FOR_LOCAL is set to "true" or "1" (case-insensitive)
*/
bool use_cpu_memory_for_local();

/**
* @brief Determine which memory backend to use based on execution mode
*
* Logic:
* - LOCAL execution -> GPU memory by default, CPU if CUOPT_USE_CPU_MEM_FOR_LOCAL=true (test mode)
* - REMOTE execution -> CPU memory by default, GPU if CUOPT_USE_GPU_MEM_FOR_REMOTE=true
*
* @return memory_backend_t::GPU or memory_backend_t::CPU
*/
memory_backend_t get_memory_backend_type();

} // namespace cuopt::linear_programming
Loading