From c5ecef321a3655758a7f5470496af86334f1cae9 Mon Sep 17 00:00:00 2001 From: Sergei Isakov <54642992+sergeisakov@users.noreply.github.com> Date: Fri, 30 Jan 2026 17:55:35 +0100 Subject: [PATCH 1/3] Add options for cuStateVecEx. --- apps/qsim_base_custatevecex.cu | 22 +- docs/cirq_interface.md | 15 +- lib/multiprocess_custatevecex.h | 13 +- lib/vectorspace_custatevecex.h | 4 + pybind_interface/avx2/pybind_main_avx2.cpp | 9 +- .../avx512/pybind_main_avx512.cpp | 9 +- pybind_interface/basic/pybind_main_basic.cpp | 9 +- pybind_interface/cuda/pybind_main_cuda.cpp | 10 +- pybind_interface/custatevec/CMakeLists.txt | 4 +- .../custatevec/pybind_main_custatevec.cpp | 6 +- pybind_interface/custatevecex/CMakeLists.txt | 4 +- .../custatevecex/pybind_main_custatevecex.cpp | 55 ++- pybind_interface/hip/pybind_main_hip.cpp | 9 +- pybind_interface/pybind_main.cpp | 371 +++++++----------- pybind_interface/pybind_main.h | 11 + pybind_interface/sse/pybind_main_sse.cpp | 9 +- qsimcirq/qsim_simulator.py | 14 +- tests/hybrid_custatevecex_test.cu | 2 +- tests/qtrajectory_custatevecex_test.cu | 2 +- tests/run_custatevecex_test.cu | 2 +- tests/simulator_custatevecex_test.cu | 2 +- tests/statespace_custatevecex_test.cu | 2 +- 22 files changed, 288 insertions(+), 296 deletions(-) diff --git a/apps/qsim_base_custatevecex.cu b/apps/qsim_base_custatevecex.cu index 99ce1a283..d7d1180a3 100644 --- a/apps/qsim_base_custatevecex.cu +++ b/apps/qsim_base_custatevecex.cu @@ -32,18 +32,19 @@ struct Options { std::string circuit_file; unsigned maxtime = std::numeric_limits::max(); unsigned seed = 1; + unsigned lbuf = 30; unsigned verbosity = 0; }; Options GetOptions(int argc, char* argv[]) { - constexpr char usage[] = "usage:\n ./qsim_base -c circuit -d maxtime " - "-s seed -v verbosity\n"; + constexpr char usage[] = "usage:\n ./qsim_base_custatevecex.x -c circuit " + "-d maxtime -s seed -l lbuf -v verbosity\n"; Options opt; int k; - while ((k = getopt(argc, argv, "c:d:s:v:")) != -1) { + while ((k = getopt(argc, argv, "c:d:s:l:v:")) != -1) { switch (k) { case 'c': opt.circuit_file = optarg; @@ -54,6 +55,9 @@ Options GetOptions(int argc, char* argv[]) { case 's': opt.seed = std::atoi(optarg); break; + case 'l': + opt.lbuf = std::atoi(optarg); + break; case 'v': opt.verbosity = std::atoi(optarg); break; @@ -112,8 +116,14 @@ int main(int argc, char* argv[]) { using Simulator = qsim::SimulatorCuStateVecEx; using StateSpace = Simulator::StateSpace; - explicit Factory(unsigned verbosity = 0) : verbosity(verbosity) { - mp.initialize(); + explicit Factory(uint64_t transfer_buffer_size, unsigned verbosity = 0) + : verbosity(verbosity) { + MultiProcessCuStateVecEx::Parameter param = {transfer_buffer_size}; + mp.initialize(param); + + if (verbosity > 2 && mp.initialized()) { + qsim::IO::messagef("# transfer_buf_size=%lu\n", transfer_buffer_size); + } } StateSpace CreateStateSpace() const { @@ -136,7 +146,7 @@ int main(int argc, char* argv[]) { using State = StateSpace::State; using Runner = CuStateVecExRunner; - Factory factory(opt.verbosity); + Factory factory(uint64_t{1} << opt.lbuf, opt.verbosity); StateSpace state_space = factory.CreateStateSpace(); State state = state_space.Create(circuit.num_qubits); diff --git a/docs/cirq_interface.md b/docs/cirq_interface.md index 5dd1ddb4f..7448d251f 100644 --- a/docs/cirq_interface.md +++ b/docs/cirq_interface.md @@ -183,16 +183,17 @@ To compile with the NVIDIA cuStateVec library (v1.0.0 or higher is required), set the environmment variable `CUQUANTUM_ROOT` to the path to the cuStateVec library. -`QSimOptions` provides five parameters to configure GPU execution. `use_gpu` +`QSimOptions` provides six parameters to configure GPU execution. `use_gpu` is required to enable GPU execution: * `use_gpu`: if True, use GPU instead of CPU for simulation. * `gpu_mode`: use CUDA if set to 0 (default value), use the NVIDIA cuStateVec if set to 1 or use the NVIDIA cuStateVecEx library if set to any other value. In the case of the NVIDIA cuStateVecEx library, simulations can be performed -in multi-device / multi-node environments. +in multi-device / multi-node environments. A CUDA-aware MPI library is required +for multi-node. Currently, only Open MPI is supported. -If `use_gpu` is set and `gpu_mode` is set to 0, the remaining parameters can +If `use_gpu` is set and `gpu_mode` is set to 0, two parameters can optionally be set to fine-tune StateSpace performance for a specific device. In most cases, the default values provide good performance. * `gpu_state_threads`: number of threads per CUDA block to use for the GPU @@ -200,3 +201,11 @@ StateSpace. This must be a power of 2 in the range [32, 1024]. * `gpu_data_blocks`: number of data blocks to use for the GPU StateSpace. Below 16 data blocks, performance is noticeably reduced. +If `use_gpu` is set and `gpu_mode` is set to 2 or greater (cuStateVecEx), two +parameters can be set to adjust the transfer buffer size for MPI communication +or network type. +* `gpu_cusvex_log_buf_size`: log2 of the buffer size. Default value is 30, +i.e. the buffer size is 2^30 bytes. +* `gpu_cusvex_network_type`: Device network type for multi-device: +0=Switch (default), 1=FullMesh. Or layered network type for multi-process: +0=SuperPOD (default), 1=GB200NVL, 2=SwitchTree, 3=Communicator. diff --git a/lib/multiprocess_custatevecex.h b/lib/multiprocess_custatevecex.h index 9c4a13bb1..1ffcd0332 100644 --- a/lib/multiprocess_custatevecex.h +++ b/lib/multiprocess_custatevecex.h @@ -39,12 +39,11 @@ struct MultiProcessCuStateVecEx { }; struct Parameter { - uint64_t transfer_buffer_size = 16777216; + uint64_t transfer_buffer_size = uint64_t{1} << 30; NetworkType network_type = kSuperPod; }; - MultiProcessCuStateVecEx(Parameter param = Parameter{16777216, kSuperPod}) - : param_(param), communicator_(nullptr), initialized_(false) {} + MultiProcessCuStateVecEx() : communicator_(nullptr), initialized_(false) {} ~MultiProcessCuStateVecEx() { if (communicator_) { @@ -67,11 +66,17 @@ struct MultiProcessCuStateVecEx { return rank_; } + static bool valid_network_type(unsigned network_type) { + return network_type < 4; + } + bool initialized() const { return initialized_; } - void initialize() { + void initialize(Parameter param) { + param_ = param; + int argc = 0; char** argv = nullptr; diff --git a/lib/vectorspace_custatevecex.h b/lib/vectorspace_custatevecex.h index 3fa26a931..c7b17455a 100644 --- a/lib/vectorspace_custatevecex.h +++ b/lib/vectorspace_custatevecex.h @@ -480,6 +480,10 @@ class VectorSpaceCuStateVecEx { return vector.get() == nullptr; } + static bool ValidDeviceNetworkType(unsigned network_type) { + return network_type < 2; + } + bool Copy(const Vector& src, Vector& dest) const { if (src.num_qubits() != dest.num_qubits()) { return false; diff --git a/pybind_interface/avx2/pybind_main_avx2.cpp b/pybind_interface/avx2/pybind_main_avx2.cpp index a70f1b0f2..193ed97e3 100644 --- a/pybind_interface/avx2/pybind_main_avx2.cpp +++ b/pybind_interface/avx2/pybind_main_avx2.cpp @@ -27,12 +27,9 @@ namespace qsim { using Simulator = SimulatorAVX; struct Factory { - // num_state_threads and num_dblocks are unused, but kept for consistency - // with the GPU Factory. - Factory( - unsigned num_sim_threads, - unsigned num_state_threads, - unsigned num_dblocks) : num_threads(num_sim_threads) {} + explicit Factory(const py::dict& options) { + num_threads = ParseOptions(options, "t\0"); + } using Simulator = qsim::Simulator; using StateSpace = Simulator::StateSpace; diff --git a/pybind_interface/avx512/pybind_main_avx512.cpp b/pybind_interface/avx512/pybind_main_avx512.cpp index 548bf40bf..b730a0d6d 100644 --- a/pybind_interface/avx512/pybind_main_avx512.cpp +++ b/pybind_interface/avx512/pybind_main_avx512.cpp @@ -27,12 +27,9 @@ namespace qsim { using Simulator = SimulatorAVX512; struct Factory { - // num_state_threads and num_dblocks are unused, but kept for consistency - // with the GPU Factory. - Factory( - unsigned num_sim_threads, - unsigned num_state_threads, - unsigned num_dblocks) : num_threads(num_sim_threads) {} + explicit Factory(const py::dict& options) { + num_threads = ParseOptions(options, "t\0"); + } using Simulator = qsim::Simulator; using StateSpace = Simulator::StateSpace; diff --git a/pybind_interface/basic/pybind_main_basic.cpp b/pybind_interface/basic/pybind_main_basic.cpp index 9b37191b5..6db3f5a5e 100644 --- a/pybind_interface/basic/pybind_main_basic.cpp +++ b/pybind_interface/basic/pybind_main_basic.cpp @@ -27,12 +27,9 @@ namespace qsim { using Simulator = SimulatorBasic; struct Factory { - // num_state_threads and num_dblocks are unused, but kept for consistency - // with the GPU Factory. - Factory( - unsigned num_sim_threads, - unsigned num_state_threads, - unsigned num_dblocks) : num_threads(num_sim_threads) {} + explicit Factory(const py::dict& options) { + num_threads = ParseOptions(options, "t\0"); + } using Simulator = qsim::Simulator; using StateSpace = Simulator::StateSpace; diff --git a/pybind_interface/cuda/pybind_main_cuda.cpp b/pybind_interface/cuda/pybind_main_cuda.cpp index df9a293ee..d399de4c8 100644 --- a/pybind_interface/cuda/pybind_main_cuda.cpp +++ b/pybind_interface/cuda/pybind_main_cuda.cpp @@ -24,13 +24,9 @@ namespace qsim { using Simulator = SimulatorCUDA; struct Factory { - Factory( - unsigned num_sim_threads, - unsigned num_state_threads, - unsigned num_dblocks - ) { - ss_params.num_threads = num_state_threads; - ss_params.num_dblocks = num_dblocks; + explicit Factory(const py::dict& options) { + ss_params.num_threads = ParseOptions(options, "gsst\0"); + ss_params.num_dblocks = ParseOptions(options, "gdb\0"); } using Simulator = qsim::Simulator; diff --git a/pybind_interface/custatevec/CMakeLists.txt b/pybind_interface/custatevec/CMakeLists.txt index 34bda5e48..9afecb6bc 100644 --- a/pybind_interface/custatevec/CMakeLists.txt +++ b/pybind_interface/custatevec/CMakeLists.txt @@ -45,12 +45,10 @@ include_directories($ENV{CUQUANTUM_ROOT}/include) link_directories($ENV{CUQUANTUM_ROOT}/lib $ENV{CUQUANTUM_ROOT}/lib64) add_library(qsim_custatevec MODULE pybind_main_custatevec.cpp) -target_link_libraries(qsim_custatevec -lcustatevec -lcublas) - set_target_properties(qsim_custatevec PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" SUFFIX "${PYTHON_MODULE_EXTENSION}" ) set_source_files_properties(pybind_main_custatevec.cpp PROPERTIES LANGUAGE CUDA) -target_link_libraries(qsim_custatevec PRIVATE qsim_openmp_config) +target_link_libraries(qsim_custatevec PRIVATE qsim_openmp_config -lcustatevec -lcublas) diff --git a/pybind_interface/custatevec/pybind_main_custatevec.cpp b/pybind_interface/custatevec/pybind_main_custatevec.cpp index b4523c8ff..6e61831cf 100644 --- a/pybind_interface/custatevec/pybind_main_custatevec.cpp +++ b/pybind_interface/custatevec/pybind_main_custatevec.cpp @@ -27,11 +27,7 @@ namespace qsim { using Simulator = SimulatorCuStateVec; struct Factory { - // num_sim_threads, num_state_threads and num_dblocks are unused, but kept - // for consistency with other factories. - Factory(unsigned num_sim_threads, - unsigned num_state_threads, - unsigned num_dblocks) { + explicit Factory(const py::dict& options) { ErrorCheck(cublasCreate(&cublas_handle)); ErrorCheck(custatevecCreate(&custatevec_handle)); } diff --git a/pybind_interface/custatevecex/CMakeLists.txt b/pybind_interface/custatevecex/CMakeLists.txt index 93d66ac33..0b4d49439 100644 --- a/pybind_interface/custatevecex/CMakeLists.txt +++ b/pybind_interface/custatevecex/CMakeLists.txt @@ -45,12 +45,10 @@ include_directories($ENV{CUQUANTUM_ROOT}/include) link_directories($ENV{CUQUANTUM_ROOT}/lib $ENV{CUQUANTUM_ROOT}/lib64) add_library(qsim_custatevecex MODULE pybind_main_custatevecex.cpp) -target_link_libraries(qsim_custatevecex -lcustatevec -lcublas) - set_target_properties(qsim_custatevecex PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" SUFFIX "${PYTHON_MODULE_EXTENSION}" ) set_source_files_properties(pybind_main_custatevecex.cpp PROPERTIES LANGUAGE CUDA) -target_link_libraries(qsim_custatevecex PRIVATE qsim_openmp_config) +target_link_libraries(qsim_custatevecex PRIVATE qsim_openmp_config -lcustatevec -lcublas) diff --git a/pybind_interface/custatevecex/pybind_main_custatevecex.cpp b/pybind_interface/custatevecex/pybind_main_custatevecex.cpp index c29a608a6..e8b9e7f6d 100644 --- a/pybind_interface/custatevecex/pybind_main_custatevecex.cpp +++ b/pybind_interface/custatevecex/pybind_main_custatevecex.cpp @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include #include "pybind_main_custatevecex.h" @@ -33,19 +35,43 @@ namespace qsim { using Simulator = SimulatorCuStateVecEx; struct Factory { - // num_sim_threads, num_state_threads and num_dblocks are unused, but kept - // for consistency with other factories. - Factory(unsigned num_sim_threads, - unsigned num_state_threads, - unsigned num_dblocks) { + using Simulator = qsim::Simulator; + using StateSpace = Simulator::StateSpace; + + explicit Factory(const py::dict& options) { + verbosity = ParseOptions(options, "v\0"); + nwt = ParseOptions(options, "gnwt\0"); + + if (!mp.initialized()) { + using MP = qsim::MultiProcessCuStateVecEx; + + if (!mp.valid_network_type(nwt)) { + throw std::invalid_argument("Invalid network type."); + } + + unsigned l = ParseOptions(options, "glbuf\0"); + uint64_t buffer_size = uint64_t{1} << l; + + MP::NetworkType network_type = static_cast(nwt); + + MP::Parameter param; + param.transfer_buffer_size = buffer_size; + param.network_type = network_type; + + mp.initialize(param); + + if (verbosity > 2 && mp.initialized()) { + qsim::IO::messagef("transfer_buf_size=%lu\n", buffer_size); + } + } + if (!mp.initialized()) { - mp.initialize(); + if (!StateSpace::ValidDeviceNetworkType(nwt)) { + throw std::invalid_argument("Invalid device network type."); + } } } - using Simulator = qsim::Simulator; - using StateSpace = Simulator::StateSpace; - using Gate = Cirq::GateCirq; using Runner = CuStateVecExRunner; struct RunnerParameter : public Runner::Parameter { @@ -59,12 +85,21 @@ namespace qsim { }; StateSpace CreateStateSpace() const { - return StateSpace{mp}; + using NetworkType = StateSpace::DeviceNetworkType; + + StateSpace::Parameter param; + param.device_network_type = static_cast(nwt); + param.verbosity = verbosity; + + return StateSpace{mp, param}; } Simulator CreateSimulator() const { return Simulator{}; } + + unsigned verbosity = 0; + unsigned nwt = 0; }; inline void SetFlushToZeroAndDenormalsAreZeros() {} diff --git a/pybind_interface/hip/pybind_main_hip.cpp b/pybind_interface/hip/pybind_main_hip.cpp index 98a9e3fcd..726048bcd 100644 --- a/pybind_interface/hip/pybind_main_hip.cpp +++ b/pybind_interface/hip/pybind_main_hip.cpp @@ -24,11 +24,10 @@ namespace qsim { using Simulator = SimulatorCUDA; struct Factory { - Factory( - unsigned num_sim_threads, - unsigned num_state_threads, - unsigned num_dblocks - ) : ss_params{num_state_threads, num_dblocks} {} + explicit Factory(const py::dict& options) { + ss_params.num_threads = ParseOptions(options, "gsst\0"); + ss_params.num_dblocks = ParseOptions(options, "gdb\0"); + } using Simulator = qsim::Simulator; using StateSpace = Simulator::StateSpace; diff --git a/pybind_interface/pybind_main.cpp b/pybind_interface/pybind_main.cpp index 5f6644469..1efba5484 100644 --- a/pybind_interface/pybind_main.cpp +++ b/pybind_interface/pybind_main.cpp @@ -34,16 +34,6 @@ using namespace qsim; namespace { -template -T parseOptions(const py::dict &options, const char *key) { - if (!options.contains(key)) { - std::string msg = std::string("Argument ") + key + " is not provided.\n"; - throw std::invalid_argument(msg); - } - const auto &value = options[key]; - return value.cast(); -} - Circuit getCircuit(const py::dict &options) { try { return options["c\0"].cast>(); @@ -63,7 +53,7 @@ NoisyCircuit getNoisyCircuit(const py::dict &options) { std::vector getBitstrings(const py::dict &options, int num_qubits) { std::string bitstrings_str; try { - bitstrings_str = parseOptions(options, "i\0"); + bitstrings_str = ParseOptions(options, "i\0"); } catch (const std::invalid_argument &exp) { throw; } @@ -389,40 +379,28 @@ std::vector> qsim_simulate(const py::dict &options) { } }; - bool use_gpu; bool denormals_are_zeros; - unsigned gpu_mode; - unsigned num_sim_threads = 0; - unsigned num_state_threads = 0; - unsigned num_dblocks = 0; RunnerParameter param; + try { - use_gpu = parseOptions(options, "g\0"); - gpu_mode = parseOptions(options, "gmode\0"); - denormals_are_zeros = parseOptions(options, "z\0"); - if (use_gpu == 0) { - num_sim_threads = parseOptions(options, "t\0"); - } else if (gpu_mode == 0) { - num_state_threads = parseOptions(options, "gsst\0"); - num_dblocks = parseOptions(options, "gdb\0"); + denormals_are_zeros = ParseOptions(options, "z\0"); + param.max_fused_size = ParseOptions(options, "f\0"); + param.verbosity = ParseOptions(options, "v\0"); + param.seed = ParseOptions(options, "s\0"); + + if (denormals_are_zeros) { + SetFlushToZeroAndDenormalsAreZeros(); + } else { + ClearFlushToZeroAndDenormalsAreZeros(); } - param.max_fused_size = parseOptions(options, "f\0"); - param.verbosity = parseOptions(options, "v\0"); - param.seed = parseOptions(options, "s\0"); + + Factory factory(options); + Runner::Run(param, factory, circuit, measure); } catch (const std::invalid_argument &exp) { IO::errorf("%s", exp.what()); return {}; } - if (denormals_are_zeros) { - SetFlushToZeroAndDenormalsAreZeros(); - } else { - ClearFlushToZeroAndDenormalsAreZeros(); - } - - Runner::Run( - param, Factory(num_sim_threads, num_state_threads, num_dblocks), circuit, - measure); return amplitudes; } @@ -450,54 +428,42 @@ std::vector> qtrajectory_simulate(const py::dict &options) { amplitudes.reserve(bitstrings.size()); NoisyRunnerParameter param; - bool use_gpu; bool denormals_are_zeros; - unsigned gpu_mode; - unsigned num_sim_threads = 0; - unsigned num_state_threads = 0; - unsigned num_dblocks = 0; uint64_t seed; try { - use_gpu = parseOptions(options, "g\0"); - gpu_mode = parseOptions(options, "gmode\0"); - denormals_are_zeros = parseOptions(options, "z\0"); - if (use_gpu == 0) { - num_sim_threads = parseOptions(options, "t\0"); - } else if (gpu_mode == 0) { - num_state_threads = parseOptions(options, "gsst\0"); - num_dblocks = parseOptions(options, "gdb\0"); - } - param.max_fused_size = parseOptions(options, "f\0"); - param.verbosity = parseOptions(options, "v\0"); - seed = parseOptions(options, "s\0"); - } catch (const std::invalid_argument &exp) { - IO::errorf("%s", exp.what()); - return {}; - } + denormals_are_zeros = ParseOptions(options, "z\0"); + param.max_fused_size = ParseOptions(options, "f\0"); + param.verbosity = ParseOptions(options, "v\0"); + seed = ParseOptions(options, "s\0"); - Factory factory(num_sim_threads, num_state_threads, num_dblocks); - Simulator simulator = factory.CreateSimulator(); - StateSpace state_space = factory.CreateStateSpace(); + Factory factory(options); + Simulator simulator = factory.CreateSimulator(); + StateSpace state_space = factory.CreateStateSpace(); - auto measure = [&bitstrings, &litudes, &state_space]( - unsigned k, const State &state, NoisyRunner::Stat& stat) { - for (const auto &b : bitstrings) { - amplitudes.push_back(state_space.GetAmpl(state, b)); - } - }; + auto measure = [&bitstrings, &litudes, &state_space]( + unsigned k, const State &state, NoisyRunner::Stat& stat) { + for (const auto &b : bitstrings) { + amplitudes.push_back(state_space.GetAmpl(state, b)); + } + }; - if (denormals_are_zeros) { - SetFlushToZeroAndDenormalsAreZeros(); - } else { - ClearFlushToZeroAndDenormalsAreZeros(); - } + if (denormals_are_zeros) { + SetFlushToZeroAndDenormalsAreZeros(); + } else { + ClearFlushToZeroAndDenormalsAreZeros(); + } - if (!NoisyRunner::RunBatch(param, ncircuit, seed, seed + 1, state_space, - simulator, measure)) { - IO::errorf("qtrajectory simulation of the circuit errored out.\n"); + if (!NoisyRunner::RunBatch(param, ncircuit, seed, seed + 1, state_space, + simulator, measure)) { + IO::errorf("qtrajectory simulation of the circuit errored out.\n"); + return {}; + } + } catch (const std::invalid_argument &exp) { + IO::errorf("%s", exp.what()); return {}; } + return amplitudes; } @@ -665,48 +631,38 @@ class SimulatorHelper { private: SimulatorHelper(const py::dict &options, bool noisy) - : factory(Factory(1, 1, 1)), + : factory(Factory(options)), state(StateSpace::Null()), scratch(StateSpace::Null()) { bool denormals_are_zeros; is_valid = false; is_noisy = noisy; + try { if (is_noisy) { ncircuit = getNoisyCircuit(options); num_qubits = ncircuit.num_qubits; - noisy_reps = parseOptions(options, "r\0"); + noisy_reps = ParseOptions(options, "r\0"); } else { circuit = getCircuit(options); num_qubits = circuit.num_qubits; } - use_gpu = parseOptions(options, "g\0"); - gpu_mode = parseOptions(options, "gmode\0"); - denormals_are_zeros = parseOptions(options, "z\0"); - if (use_gpu == 0) { - num_sim_threads = parseOptions(options, "t\0"); - } else if (gpu_mode == 0) { - num_state_threads = parseOptions(options, "gsst\0"); - num_dblocks = parseOptions(options, "gdb\0"); - } - max_fused_size = parseOptions(options, "f\0"); - verbosity = parseOptions(options, "v\0"); - seed = parseOptions(options, "s\0"); - - if (use_gpu == 0 || gpu_mode == 0) { - factory = Factory(num_sim_threads, num_state_threads, num_dblocks); - } + denormals_are_zeros = ParseOptions(options, "z\0"); + max_fused_size = ParseOptions(options, "f\0"); + verbosity = ParseOptions(options, "v\0"); + seed = ParseOptions(options, "s\0"); StateSpace state_space = factory.CreateStateSpace(); state = state_space.Create(num_qubits); - is_valid = true; if (denormals_are_zeros) { SetFlushToZeroAndDenormalsAreZeros(); } else { ClearFlushToZeroAndDenormalsAreZeros(); } + + is_valid = true; } catch (const std::invalid_argument &exp) { // If this triggers, is_valid is false. IO::errorf("%s", exp.what()); @@ -848,12 +804,7 @@ class SimulatorHelper { State state; State scratch; - bool use_gpu; - unsigned gpu_mode; unsigned num_qubits; - unsigned num_sim_threads; - unsigned num_state_threads; - unsigned num_dblocks; unsigned noisy_reps; unsigned max_fused_size; unsigned verbosity; @@ -999,53 +950,43 @@ std::vector qsim_sample(const py::dict &options) { using State = StateSpace::State; using MeasurementResult = StateSpace::MeasurementResult; - bool use_gpu; bool denormals_are_zeros; - unsigned gpu_mode; - unsigned num_sim_threads = 0; - unsigned num_state_threads = 0; - unsigned num_dblocks = 0; RunnerParameter param; + + std::vector result_bits; + try { - use_gpu = parseOptions(options, "g\0"); - gpu_mode = parseOptions(options, "gmode\0"); - denormals_are_zeros = parseOptions(options, "z\0"); - if (use_gpu == 0) { - num_sim_threads = parseOptions(options, "t\0"); - } else if (gpu_mode == 0) { - num_state_threads = parseOptions(options, "gsst\0"); - num_dblocks = parseOptions(options, "gdb\0"); - } - param.max_fused_size = parseOptions(options, "f\0"); - param.verbosity = parseOptions(options, "v\0"); - param.seed = parseOptions(options, "s\0"); - } catch (const std::invalid_argument &exp) { - IO::errorf("%s", exp.what()); - return {}; - } + denormals_are_zeros = ParseOptions(options, "z\0"); + param.max_fused_size = ParseOptions(options, "f\0"); + param.verbosity = ParseOptions(options, "v\0"); + param.seed = ParseOptions(options, "s\0"); - std::vector results; - Factory factory(num_sim_threads, num_state_threads, num_dblocks); - StateSpace state_space = factory.CreateStateSpace(); - State state = state_space.Create(circuit.num_qubits); - state_space.SetStateZero(state); + std::vector results; + Factory factory(options); + StateSpace state_space = factory.CreateStateSpace(); + State state = state_space.Create(circuit.num_qubits); + state_space.SetStateZero(state); - if (denormals_are_zeros) { - SetFlushToZeroAndDenormalsAreZeros(); - } else { - ClearFlushToZeroAndDenormalsAreZeros(); - } + if (denormals_are_zeros) { + SetFlushToZeroAndDenormalsAreZeros(); + } else { + ClearFlushToZeroAndDenormalsAreZeros(); + } - if (!Runner::Run(param, factory, circuit, state, results)) { - IO::errorf("qsim sampling of the circuit errored out.\n"); + if (!Runner::Run(param, factory, circuit, state, results)) { + IO::errorf("qsim sampling of the circuit errored out.\n"); + return {}; + } + + for (const auto& result : results) { + result_bits.insert(result_bits.end(), result.bitstring.begin(), + result.bitstring.end()); + } + } catch (const std::invalid_argument &exp) { + IO::errorf("%s", exp.what()); return {}; } - std::vector result_bits; - for (const auto& result : results) { - result_bits.insert(result_bits.end(), result.bitstring.begin(), - result.bitstring.end()); - } return result_bits; } @@ -1064,79 +1005,68 @@ std::vector qtrajectory_sample(const py::dict &options) { using StateSpace = Simulator::StateSpace; using State = StateSpace::State; - NoisyRunnerParameter param; - bool use_gpu; bool denormals_are_zeros; - unsigned gpu_mode; - unsigned num_sim_threads = 0; - unsigned num_state_threads = 0; - unsigned num_dblocks = 0; + NoisyRunnerParameter param; uint64_t seed; + std::vector result_bits; + try { - use_gpu = parseOptions(options, "g\0"); - gpu_mode = parseOptions(options, "gmode\0"); - denormals_are_zeros = parseOptions(options, "z\0"); - if (use_gpu == 0) { - num_sim_threads = parseOptions(options, "t\0"); - } else if (gpu_mode == 0) { - num_state_threads = parseOptions(options, "gsst\0"); - num_dblocks = parseOptions(options, "gdb\0"); - } - param.max_fused_size = parseOptions(options, "f\0"); - param.verbosity = parseOptions(options, "v\0"); - seed = parseOptions(options, "s\0"); + denormals_are_zeros = ParseOptions(options, "z\0"); + param.max_fused_size = ParseOptions(options, "f\0"); + param.verbosity = ParseOptions(options, "v\0"); + seed = ParseOptions(options, "s\0"); param.collect_mea_stat = true; - } catch (const std::invalid_argument &exp) { - IO::errorf("%s", exp.what()); - return {}; - } - Factory factory(num_sim_threads, num_state_threads, num_dblocks); - Simulator simulator = factory.CreateSimulator(); - StateSpace state_space = factory.CreateStateSpace(); - - std::vector> results; - - auto measure = [&results, &ncircuit]( - unsigned k, const State& state, NoisyRunner::Stat& stat) { - // Converts stat (which matches the MeasurementResult 'bits' field) into - // bitstrings matching the MeasurementResult 'bitstring' field. - unsigned idx = 0; - for (const auto& channel : ncircuit.channels) { - if (channel[0].kind != gate::kMeasurement) - continue; - for (const auto& op : channel[0].ops) { - std::vector bitstring; - uint64_t val = stat.samples[idx]; - for (const auto& q : op.qubits) { - bitstring.push_back((val >> q) & 1); - } - results.push_back(bitstring); + Factory factory(options); + Simulator simulator = factory.CreateSimulator(); + StateSpace state_space = factory.CreateStateSpace(); - idx += 1; - if (idx >= stat.samples.size()) - return; + std::vector> results; + + auto measure = [&results, &ncircuit]( + unsigned k, const State& state, NoisyRunner::Stat& stat) { + // Converts stat (which matches the MeasurementResult 'bits' field) into + // bitstrings matching the MeasurementResult 'bitstring' field. + unsigned idx = 0; + for (const auto& channel : ncircuit.channels) { + if (channel[0].kind != gate::kMeasurement) + continue; + for (const auto& op : channel[0].ops) { + std::vector bitstring; + uint64_t val = stat.samples[idx]; + for (const auto& q : op.qubits) { + bitstring.push_back((val >> q) & 1); + } + results.push_back(bitstring); + + idx += 1; + if (idx >= stat.samples.size()) + return; + } } + }; + + if (denormals_are_zeros) { + SetFlushToZeroAndDenormalsAreZeros(); + } else { + ClearFlushToZeroAndDenormalsAreZeros(); } - }; - if (denormals_are_zeros) { - SetFlushToZeroAndDenormalsAreZeros(); - } else { - ClearFlushToZeroAndDenormalsAreZeros(); - } + if (!NoisyRunner::RunBatch(param, ncircuit, seed, seed + 1, + state_space, simulator, measure)) { + IO::errorf("qtrajectory sampling of the circuit errored out.\n"); + return {}; + } - if (!NoisyRunner::RunBatch(param, ncircuit, seed, seed + 1, - state_space, simulator, measure)) { - IO::errorf("qtrajectory sampling of the circuit errored out.\n"); + for (const auto& bitstring : results) { + result_bits.insert(result_bits.end(), bitstring.begin(), bitstring.end()); + } + } catch (const std::invalid_argument &exp) { + IO::errorf("%s", exp.what()); return {}; } - std::vector result_bits; - for (const auto& bitstring : results) { - result_bits.insert(result_bits.end(), bitstring.begin(), bitstring.end()); - } return result_bits; } @@ -1155,36 +1085,37 @@ std::vector> qsimh_simulate(const py::dict &options) { try { circuit = getCircuit(options); bitstrings = getBitstrings(options, circuit.num_qubits); - dense_parts = parseOptions(options, "k\0"); - param.prefix = parseOptions(options, "w\0"); - param.num_prefix_gatexs = parseOptions(options, "p\0"); - param.num_root_gatexs = parseOptions(options, "r\0"); - param.num_threads = parseOptions(options, "t\0"); - param.max_fused_size = parseOptions(options, "f\0"); - param.verbosity = parseOptions(options, "v\0"); - } catch (const std::invalid_argument &exp) { - IO::errorf("%s", exp.what()); - return {}; - } - - std::vector parts(circuit.num_qubits, 0); - for (auto i : dense_parts) { - unsigned idx = i.cast(); - if (idx >= circuit.num_qubits) { - IO::errorf("Invalid arguments are provided for arg k.\n"); - return {}; + dense_parts = ParseOptions(options, "k\0"); + param.prefix = ParseOptions(options, "w\0"); + param.num_prefix_gatexs = ParseOptions(options, "p\0"); + param.num_root_gatexs = ParseOptions(options, "r\0"); + param.num_threads = ParseOptions(options, "t\0"); + param.max_fused_size = ParseOptions(options, "f\0"); + param.verbosity = ParseOptions(options, "v\0"); + + std::vector parts(circuit.num_qubits, 0); + for (auto i : dense_parts) { + unsigned idx = i.cast(); + if (idx >= circuit.num_qubits) { + IO::errorf("Invalid arguments are provided for arg k.\n"); + return {}; + } + parts[i.cast()] = 1; } - parts[i.cast()] = 1; - } - // Define container for amplitudes - std::vector> amplitudes(bitstrings.size(), 0); + // Define container for amplitudes + std::vector> amplitudes(bitstrings.size(), 0); - Factory factory(param.num_threads, 0, 0); + Factory factory(options); - if (Runner::Run(param, factory, circuit, parts, bitstrings, amplitudes)) { - return amplitudes; + if (Runner::Run(param, factory, circuit, parts, bitstrings, amplitudes)) { + return amplitudes; + } + } catch (const std::invalid_argument &exp) { + IO::errorf("%s", exp.what()); + return {}; } + IO::errorf("qsimh simulation of the circuit errored out.\n"); return {}; } diff --git a/pybind_interface/pybind_main.h b/pybind_interface/pybind_main.h index 1e4395473..325b9a91e 100644 --- a/pybind_interface/pybind_main.h +++ b/pybind_interface/pybind_main.h @@ -23,6 +23,7 @@ namespace py = pybind11; #include +#include #include #include "../lib/circuit.h" @@ -180,6 +181,16 @@ qtrajectory_simulate_moment_expectation_values( // Hybrid simulator. std::vector> qsimh_simulate(const py::dict &options); +template +T ParseOptions(const py::dict& options, const char* key) { + if (!options.contains(key)) { + std::string msg = std::string("Argument ") + key + " is not provided.\n"; + throw std::invalid_argument(msg); + } + const auto& value = options[key]; + return value.cast(); +} + #define MODULE_BINDINGS \ m.doc() = "pybind11 plugin"; /* optional module docstring */ \ /* Methods for returning amplitudes */ \ diff --git a/pybind_interface/sse/pybind_main_sse.cpp b/pybind_interface/sse/pybind_main_sse.cpp index f04925135..21287d31f 100644 --- a/pybind_interface/sse/pybind_main_sse.cpp +++ b/pybind_interface/sse/pybind_main_sse.cpp @@ -27,12 +27,9 @@ namespace qsim { using Simulator = SimulatorSSE; struct Factory { - // num_state_threads and num_dblocks are unused, but kept for consistency - // with the GPU Factory. - Factory( - unsigned num_sim_threads, - unsigned num_state_threads, - unsigned num_dblocks) : num_threads(num_sim_threads) {} + explicit Factory(const py::dict& options) { + num_threads = ParseOptions(options, "t\0"); + } using Simulator = qsim::Simulator; using StateSpace = Simulator::StateSpace; diff --git a/qsimcirq/qsim_simulator.py b/qsimcirq/qsim_simulator.py index 240715639..417f67072 100644 --- a/qsimcirq/qsim_simulator.py +++ b/qsimcirq/qsim_simulator.py @@ -63,11 +63,19 @@ class QSimOptions: gpu_mode: use CUDA if set to 0 (default value), use the NVIDIA cuStateVec library if set to 1 or use the NVIDIA cuStateVecEx library if set to any other value. The "gpu_*" arguments below are - only considered if this is set to 0. + only considered if this is set to 0. The "gpu_cusvex_*" arguments + below are only considered if this is set to 2 or greater. gpu_state_threads: number of threads per CUDA block to use for the GPU StateSpace. This must be a power of 2 in the range [32, 1024]. gpu_data_blocks: number of data blocks to use for the GPU StateSpace. Below 16 data blocks, performance is noticeably reduced. + gpu_cusvex_log_buf_size: log2 of the transfer buffer size that is used + for MPI communication. Default value is 30, i.e. the buffer size is + 2^30 bytes. + gpu_cusvex_network_type: Device network type for multi-device: + 0=Switch (default), 1=FullMesh. Or layered network type for + multi-process: 0=SuperPOD (default), 1=GB200NVL, 2=SwitchTree, + 3=Communicator. verbosity: Logging verbosity. denormals_are_zeros: if true, set flush-to-zero and denormals-are-zeros MXCSR control flags. This prevents rare cases of performance @@ -81,6 +89,8 @@ class QSimOptions: gpu_mode: int = 0 gpu_state_threads: int = 512 gpu_data_blocks: int = 16 + gpu_cusvex_log_buf_size: int = 30 + gpu_cusvex_network_type: int = 0 verbosity: int = 0 denormals_are_zeros: bool = False @@ -97,6 +107,8 @@ def as_dict(self): "gmode": self.gpu_mode, "gsst": self.gpu_state_threads, "gdb": self.gpu_data_blocks, + "glbuf": self.gpu_cusvex_log_buf_size, + "gnwt": self.gpu_cusvex_network_type, "v": self.verbosity, "z": self.denormals_are_zeros, } diff --git a/tests/hybrid_custatevecex_test.cu b/tests/hybrid_custatevecex_test.cu index a0c75b031..96b1e27a2 100644 --- a/tests/hybrid_custatevecex_test.cu +++ b/tests/hybrid_custatevecex_test.cu @@ -53,7 +53,7 @@ TEST(HybridCuStateVecExTest, Hybrid4) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(); + qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); } diff --git a/tests/qtrajectory_custatevecex_test.cu b/tests/qtrajectory_custatevecex_test.cu index 8d70bfc00..d97c5b6d1 100644 --- a/tests/qtrajectory_custatevecex_test.cu +++ b/tests/qtrajectory_custatevecex_test.cu @@ -82,7 +82,7 @@ TEST(QTrajectoryCuStateVecExTest, UncomputeFinalState) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(); + qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); } diff --git a/tests/run_custatevecex_test.cu b/tests/run_custatevecex_test.cu index 079fd2696..e2ada42f0 100644 --- a/tests/run_custatevecex_test.cu +++ b/tests/run_custatevecex_test.cu @@ -256,7 +256,7 @@ TEST(RunQSimTest, CirqGates) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(); + qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); } diff --git a/tests/simulator_custatevecex_test.cu b/tests/simulator_custatevecex_test.cu index dcf9eaf65..cfb3df377 100644 --- a/tests/simulator_custatevecex_test.cu +++ b/tests/simulator_custatevecex_test.cu @@ -99,7 +99,7 @@ TYPED_TEST(SimulatorCuStateVecExTest, ExpectationValue2) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(); + qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); } diff --git a/tests/statespace_custatevecex_test.cu b/tests/statespace_custatevecex_test.cu index db840d7c9..36d2f7df3 100644 --- a/tests/statespace_custatevecex_test.cu +++ b/tests/statespace_custatevecex_test.cu @@ -113,7 +113,7 @@ TYPED_TEST(StateSpaceCuStateVecExTest, BulkSetAmplDefault) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(); + qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); } From ca89f998571c9181c509c05badbfac88622e6dc1 Mon Sep 17 00:00:00 2001 From: Sergei Isakov <54642992+sergeisakov@users.noreply.github.com> Date: Sun, 1 Feb 2026 15:20:25 +0100 Subject: [PATCH 2/3] Restrict the number of devices to pass tests. --- tests/simulator_custatevecex_test.cu | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/simulator_custatevecex_test.cu b/tests/simulator_custatevecex_test.cu index cfb3df377..fe39f8cc0 100644 --- a/tests/simulator_custatevecex_test.cu +++ b/tests/simulator_custatevecex_test.cu @@ -28,8 +28,7 @@ namespace qsim { template class SimulatorCuStateVecExTest : public testing::Test {}; -//using fp_impl = ::testing::Types; -using fp_impl = ::testing::Types; +using fp_impl = ::testing::Types; TYPED_TEST_SUITE(SimulatorCuStateVecExTest, fp_impl); @@ -41,7 +40,9 @@ struct Factory { using StateSpace = typename Simulator::StateSpace; StateSpace CreateStateSpace() const { - return StateSpace{mp}; + typename StateSpace::Parameter param; + param.num_devices = 2; + return StateSpace{mp, param}; } Simulator CreateSimulator() const { From 4a9be974c8b21afc38a8632b701d3582ca3d7586 Mon Sep 17 00:00:00 2001 From: Sergei Isakov <54642992+sergeisakov@users.noreply.github.com> Date: Sun, 1 Feb 2026 17:09:55 +0100 Subject: [PATCH 3/3] Make names consistent. --- apps/qsim_base_custatevecex.cu | 4 ++-- lib/multiprocess_custatevecex.h | 18 +++++++++--------- lib/statespace_custatevecex.h | 8 ++++---- lib/vectorspace_custatevecex.h | 18 +++++++++--------- .../custatevecex/pybind_main_custatevecex.cpp | 10 +++++----- tests/hybrid_custatevecex_test.cu | 2 +- tests/qtrajectory_custatevecex_test.cu | 2 +- tests/run_custatevecex_test.cu | 2 +- tests/simulator_custatevecex_test.cu | 2 +- tests/statespace_custatevecex_test.cu | 2 +- 10 files changed, 34 insertions(+), 34 deletions(-) diff --git a/apps/qsim_base_custatevecex.cu b/apps/qsim_base_custatevecex.cu index d7d1180a3..041cfba37 100644 --- a/apps/qsim_base_custatevecex.cu +++ b/apps/qsim_base_custatevecex.cu @@ -119,9 +119,9 @@ int main(int argc, char* argv[]) { explicit Factory(uint64_t transfer_buffer_size, unsigned verbosity = 0) : verbosity(verbosity) { MultiProcessCuStateVecEx::Parameter param = {transfer_buffer_size}; - mp.initialize(param); + mp.Initialize(param); - if (verbosity > 2 && mp.initialized()) { + if (verbosity > 2 && mp.Initialized()) { qsim::IO::messagef("# transfer_buf_size=%lu\n", transfer_buffer_size); } } diff --git a/lib/multiprocess_custatevecex.h b/lib/multiprocess_custatevecex.h index 1ffcd0332..4636b461b 100644 --- a/lib/multiprocess_custatevecex.h +++ b/lib/multiprocess_custatevecex.h @@ -54,27 +54,27 @@ struct MultiProcessCuStateVecEx { custatevecExCommunicatorFinalize(&status); } - custatevecExCommunicatorDescriptor_t communicator() const { + custatevecExCommunicatorDescriptor_t Communicator() const { return communicator_; } - unsigned num_processes() const { + unsigned NumProcesses() const { return num_processes_; } - unsigned rank() const { + unsigned Rank() const { return rank_; } - static bool valid_network_type(unsigned network_type) { + static bool ValidNetworkType(unsigned network_type) { return network_type < 4; } - bool initialized() const { + bool Initialized() const { return initialized_; } - void initialize(Parameter param) { + void Initialize(Parameter param) { param_ = param; int argc = 0; @@ -114,7 +114,7 @@ struct MultiProcessCuStateVecEx { num_global_qubits_ = get_num_global_qubits(num_processes); unsigned num_acc_global_qubits = 0; - auto network_layers = get_network_layers(param_.network_type); + auto network_layers = GetNetworkLayers(param_.network_type); num_global_qubits_per_layer_.reserve(2); global_index_bit_classes_.reserve(2); @@ -154,7 +154,7 @@ struct MultiProcessCuStateVecEx { initialized_ = true; } - auto create_sv_config(unsigned num_qubits, cudaDataType_t data_type) const { + auto CreateSVConfig(unsigned num_qubits, cudaDataType_t data_type) const { custatevecExDictionaryDescriptor_t sv_config = nullptr; if (!initialized_ || @@ -192,7 +192,7 @@ struct MultiProcessCuStateVecEx { using NetworkLayers = std::vector; - static NetworkLayers get_network_layers(NetworkType id) { + static NetworkLayers GetNetworkLayers(NetworkType id) { switch (id) { case kSuperPod: return {{CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P, 3}, diff --git a/lib/statespace_custatevecex.h b/lib/statespace_custatevecex.h index ce5cb0c3e..8aa6167f2 100644 --- a/lib/statespace_custatevecex.h +++ b/lib/statespace_custatevecex.h @@ -130,7 +130,7 @@ class StateSpaceCuStateVecEx : unsigned required_rank = k / size; if (state.distr_type() != Base::kMultiProcess - || Base::mp.rank() == required_rank) { + || Base::mp.Rank() == required_rank) { ErrorCheck(custatevecExStateVectorGetState( state.get(), buf, kStateDataType, k, k + 1, 1)); } @@ -139,7 +139,7 @@ class StateSpaceCuStateVecEx : if (state.distr_type() == Base::kMultiProcess) { auto cuda_type = GetCudaType>(); - auto comm = Base::mp.communicator(); + auto comm = Base::mp.Communicator(); ErrorCheck(comm->intf->bcast(comm, buf, 1, cuda_type, required_rank)); } @@ -161,7 +161,7 @@ class StateSpaceCuStateVecEx : unsigned required_rank = k / size; if (state.distr_type() != Base::kMultiProcess - || Base::mp.rank() == required_rank) { + || Base::mp.Rank() == required_rank) { ErrorCheck(custatevecExStateVectorSetState( state.get(), buf, kStateDataType, k, k + 1, 1)); } @@ -183,7 +183,7 @@ class StateSpaceCuStateVecEx : unsigned required_rank = k / size; if (state.distr_type() != Base::kMultiProcess - || Base::mp.rank() == required_rank) { + || Base::mp.Rank() == required_rank) { ErrorCheck(custatevecExStateVectorSetState( state.get(), buf, kStateDataType, k, k + 1, 1)); } diff --git a/lib/vectorspace_custatevecex.h b/lib/vectorspace_custatevecex.h index c7b17455a..2d80f28de 100644 --- a/lib/vectorspace_custatevecex.h +++ b/lib/vectorspace_custatevecex.h @@ -245,7 +245,7 @@ class VectorSpaceCuStateVecEx { ResultType local_r = callback(k, res); auto cuda_type = GetCudaType(); - auto comm = mp_->communicator(); + auto comm = mp_->Communicator(); ErrorCheck(comm->intf->allreduce(comm, &local_r, &r, 1, cuda_type)); return r; @@ -338,7 +338,7 @@ class VectorSpaceCuStateVecEx { ResultType local_r = callback(k, res1, res2); auto cuda_type = GetCudaType(); - auto comm = mp_->communicator(); + auto comm = mp_->Communicator(); ErrorCheck(comm->intf->allreduce(comm, &local_r, &r, 1, cuda_type)); return r; @@ -385,16 +385,16 @@ class VectorSpaceCuStateVecEx { Vector Create(unsigned num_qubits) const { custatevecExStateVectorDescriptor_t state_vec; custatevecExDictionaryDescriptor_t sv_config - = mp.create_sv_config(num_qubits, kStateDataType); + = mp.CreateSVConfig(num_qubits, kStateDataType); unsigned num_substates = 1; DistributionType distr_type = kNoDistr; if (sv_config != nullptr) { ErrorCheck(custatevecExStateVectorCreateMultiProcess( - &state_vec, sv_config, nullptr, mp.communicator(), nullptr)); + &state_vec, sv_config, nullptr, mp.Communicator(), nullptr)); - num_substates = mp.num_processes(); + num_substates = mp.NumProcesses(); distr_type = kMultiProcess; if (param.verbosity > 2) { @@ -507,7 +507,7 @@ class VectorSpaceCuStateVecEx { bool Copy(const Vector& src, fp_type* dest) const { if (src.distr_type() == kMultiProcess) { uint64_t size = (uint64_t{1} << src.num_qubits()) / src.num_substates(); - uint64_t offset = size * mp.rank(); + uint64_t offset = size * mp.Rank(); ErrorCheck(custatevecExStateVectorGetState( src.get(), dest + 2 * offset, kStateDataType, @@ -515,7 +515,7 @@ class VectorSpaceCuStateVecEx { ErrorCheck(custatevecExStateVectorSynchronize(src.get())); auto cuda_type = GetCudaType>(); - auto comm = mp.communicator(); + auto comm = mp.Communicator(); ErrorCheck(comm->intf->allgather( comm, dest + 2 * offset, dest, size, cuda_type)); } else { @@ -533,7 +533,7 @@ class VectorSpaceCuStateVecEx { bool Copy(const fp_type* src, Vector& dest) const { if (dest.distr_type() == kMultiProcess) { uint64_t size = (uint64_t{1} << dest.num_qubits()) / dest.num_substates(); - uint64_t offset = size * mp.rank(); + uint64_t offset = size * mp.Rank(); ErrorCheck(custatevecExStateVectorSetState( dest.get(), src + 2 * offset, kStateDataType, @@ -564,7 +564,7 @@ class VectorSpaceCuStateVecEx { if (dest.distr_type() == kMultiProcess) { size /= dest.num_substates(); - uint64_t offset = size * mp.rank(); + uint64_t offset = size * mp.Rank(); ErrorCheck(custatevecExStateVectorSetState( dest.get(), src + 2 * offset, kStateDataType, diff --git a/pybind_interface/custatevecex/pybind_main_custatevecex.cpp b/pybind_interface/custatevecex/pybind_main_custatevecex.cpp index e8b9e7f6d..400baa220 100644 --- a/pybind_interface/custatevecex/pybind_main_custatevecex.cpp +++ b/pybind_interface/custatevecex/pybind_main_custatevecex.cpp @@ -42,10 +42,10 @@ namespace qsim { verbosity = ParseOptions(options, "v\0"); nwt = ParseOptions(options, "gnwt\0"); - if (!mp.initialized()) { + if (!mp.Initialized()) { using MP = qsim::MultiProcessCuStateVecEx; - if (!mp.valid_network_type(nwt)) { + if (!mp.ValidNetworkType(nwt)) { throw std::invalid_argument("Invalid network type."); } @@ -58,14 +58,14 @@ namespace qsim { param.transfer_buffer_size = buffer_size; param.network_type = network_type; - mp.initialize(param); + mp.Initialize(param); - if (verbosity > 2 && mp.initialized()) { + if (verbosity > 2 && mp.Initialized()) { qsim::IO::messagef("transfer_buf_size=%lu\n", buffer_size); } } - if (!mp.initialized()) { + if (!mp.Initialized()) { if (!StateSpace::ValidDeviceNetworkType(nwt)) { throw std::invalid_argument("Invalid device network type."); } diff --git a/tests/hybrid_custatevecex_test.cu b/tests/hybrid_custatevecex_test.cu index 96b1e27a2..142ad747b 100644 --- a/tests/hybrid_custatevecex_test.cu +++ b/tests/hybrid_custatevecex_test.cu @@ -53,7 +53,7 @@ TEST(HybridCuStateVecExTest, Hybrid4) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); + qsim::mp.Initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); } diff --git a/tests/qtrajectory_custatevecex_test.cu b/tests/qtrajectory_custatevecex_test.cu index d97c5b6d1..f46c1562f 100644 --- a/tests/qtrajectory_custatevecex_test.cu +++ b/tests/qtrajectory_custatevecex_test.cu @@ -82,7 +82,7 @@ TEST(QTrajectoryCuStateVecExTest, UncomputeFinalState) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); + qsim::mp.Initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); } diff --git a/tests/run_custatevecex_test.cu b/tests/run_custatevecex_test.cu index e2ada42f0..823df7dbd 100644 --- a/tests/run_custatevecex_test.cu +++ b/tests/run_custatevecex_test.cu @@ -256,7 +256,7 @@ TEST(RunQSimTest, CirqGates) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); + qsim::mp.Initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); } diff --git a/tests/simulator_custatevecex_test.cu b/tests/simulator_custatevecex_test.cu index fe39f8cc0..5d8d1ea68 100644 --- a/tests/simulator_custatevecex_test.cu +++ b/tests/simulator_custatevecex_test.cu @@ -100,7 +100,7 @@ TYPED_TEST(SimulatorCuStateVecExTest, ExpectationValue2) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); + qsim::mp.Initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); } diff --git a/tests/statespace_custatevecex_test.cu b/tests/statespace_custatevecex_test.cu index 36d2f7df3..55993459a 100644 --- a/tests/statespace_custatevecex_test.cu +++ b/tests/statespace_custatevecex_test.cu @@ -113,7 +113,7 @@ TYPED_TEST(StateSpaceCuStateVecExTest, BulkSetAmplDefault) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - qsim::mp.initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); + qsim::mp.Initialize(qsim::MultiProcessCuStateVecEx::Parameter{}); return RUN_ALL_TESTS(); }