From 2f985cfb14a821ea76e4ddcf801e8106292d66b3 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Tue, 25 Nov 2025 00:05:46 +0000 Subject: [PATCH 1/6] Refactor LLVM initialization to create fewer TargetMachines. --- cpp/src/gandiva/engine.cc | 96 +++++++++++++++++++++------------------ cpp/src/gandiva/engine.h | 13 +++++- 2 files changed, 64 insertions(+), 45 deletions(-) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index a55421b1b486..b820c79391f3 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -217,38 +217,6 @@ Status UseJITLinkIfEnabled(llvm::orc::LLJITBuilder& jit_builder) { } #endif -Result> BuildJIT( - llvm::orc::JITTargetMachineBuilder jtmb, - std::optional>& object_cache) { - llvm::orc::LLJITBuilder jit_builder; - -#ifdef JIT_LINK_SUPPORTED - ARROW_RETURN_NOT_OK(UseJITLinkIfEnabled(jit_builder)); -#endif - - jit_builder.setJITTargetMachineBuilder(std::move(jtmb)); - if (object_cache.has_value()) { - jit_builder.setCompileFunctionCreator( - [&object_cache](llvm::orc::JITTargetMachineBuilder JTMB) - -> llvm::Expected> { - auto target_machine = JTMB.createTargetMachine(); - if (!target_machine) { - return target_machine.takeError(); - } - // after compilation, the object code will be stored into the given object - // cache - return std::make_unique( - std::move(*target_machine), &object_cache.value().get()); - }); - } - auto maybe_jit = jit_builder.create(); - ARROW_ASSIGN_OR_RAISE(auto jit, - AsArrowResult(maybe_jit, "Could not create LLJIT instance: ")); - - AddProcessSymbol(*jit); - return jit; -} - arrow::Status VerifyAndLinkModule( llvm::Module& dest_module, llvm::Expected> src_module_or_error) { @@ -316,20 +284,67 @@ void Engine::InitOnce() { } Engine::Engine(const std::shared_ptr& conf, - std::unique_ptr lljit, - std::unique_ptr target_machine, bool cached) + std::optional> object_cache, + bool cached) : context_(std::make_unique()), - lljit_(std::move(lljit)), + //lljit_(std::move(lljit)), ir_builder_(std::make_unique>(*context_)), types_(*context_), optimize_(conf->optimize()), cached_(cached), function_registry_(conf->function_registry()), - target_machine_(std::move(target_machine)), conf_(conf) { + + auto jtmb = std::move(MakeTargetMachineBuilder(*conf)).ValueUnsafe(); + auto maybe_tm = jtmb.createTargetMachine(); + auto target_machine = + AsArrowResult(maybe_tm, "Could not create target machine: "); + target_machine_ = std::move(target_machine).ValueUnsafe(); + llvm::TargetMachine& target_machine_ptr = *target_machine_; + auto data_layout = target_machine_->createDataLayout(); + //BuildJIT(jtmb, object_cache, data_layout, *target_machine_); + + + + llvm::orc::LLJITBuilder jit_builder; + +//#ifdef JIT_LINK_SUPPORTED +// ARROW_RETURN_NOT_OK(UseJITLinkIfEnabled(jit_builder)); +//#endif + + jit_builder.setJITTargetMachineBuilder(std::move(jtmb)); + jit_builder.setDataLayout(std::make_optional(data_layout)); + + if (object_cache.has_value()) { + jit_builder.setCompileFunctionCreator( + [&target_machine_ptr, &object_cache](llvm::orc::JITTargetMachineBuilder JTMB) + -> llvm::Expected> { + + // after compilation, the object code will be stored into the given object + // cache + return std::make_unique( + target_machine_ptr, &object_cache.value().get()); + }); + } + auto maybe_jit = jit_builder.create(); + auto jit = AsArrowResult(maybe_jit, "Could not create LLJIT instance: ").ValueUnsafe(); + + AddProcessSymbol(*jit); + lljit_ = std::move(jit); + + + + + + + + + + // LLVM 10 doesn't like the expr function name to be the same as the module name auto module_id = "gdv_module_" + std::to_string(reinterpret_cast(this)); module_ = std::make_unique(module_id, *context_); + module_->setDataLayout(data_layout); } Engine::~Engine() {} @@ -366,14 +381,9 @@ Result> Engine::Make( std::optional> object_cache) { std::call_once(llvm_init_once_flag, InitOnce); - ARROW_ASSIGN_OR_RAISE(auto jtmb, MakeTargetMachineBuilder(*conf)); - ARROW_ASSIGN_OR_RAISE(auto jit, BuildJIT(jtmb, object_cache)); - auto maybe_tm = jtmb.createTargetMachine(); - ARROW_ASSIGN_OR_RAISE(auto target_machine, - AsArrowResult(maybe_tm, "Could not create target machine: ")); - + std::unique_ptr engine{ - new Engine(conf, std::move(jit), std::move(target_machine), cached)}; + new Engine(conf, object_cache, cached)}; ARROW_RETURN_NOT_OK(engine->Init()); return engine; diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 3c8914a7b4d9..8231b1550751 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -37,6 +37,7 @@ namespace llvm::orc { class LLJIT; +class JITTargetMachineBuilder; } // namespace llvm::orc namespace gandiva { @@ -95,8 +96,8 @@ class GANDIVA_EXPORT Engine { private: Engine(const std::shared_ptr& conf, - std::unique_ptr lljit, - std::unique_ptr target_machine, bool cached); + std::optional> object_cache, + bool cached); // Post construction init. This _must_ be called after the constructor. Status Init(); @@ -116,6 +117,14 @@ class GANDIVA_EXPORT Engine { // Remove unused functions to reduce compile time. Status RemoveUnusedFunctions(); + /* + Result> BuildJIT( + llvm::orc::JITTargetMachineBuilder jtmb, + std::optional>& object_cache, + llvm::DataLayout& data_layout, + llvm::TargetMachine* target_machine); +*/ + std::unique_ptr context_; std::unique_ptr lljit_; std::unique_ptr> ir_builder_; From 76128bca43dd5817ae8b52b52157c963fb0e464e Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Thu, 29 Jan 2026 00:22:10 +0000 Subject: [PATCH 2/6] refactor --- cpp/src/gandiva/engine.cc | 94 ++++++++++++++++++--------------------- cpp/src/gandiva/engine.h | 15 ++----- 2 files changed, 47 insertions(+), 62 deletions(-) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 777fd6a857b6..0680c8292684 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -284,63 +284,18 @@ void Engine::InitOnce() { } Engine::Engine(const std::shared_ptr& conf, - std::optional> object_cache, + std::unique_ptr lljit, + std::shared_ptr target_machine, bool cached) : context_(std::make_unique()), - //lljit_(std::move(lljit)), + lljit_(std::move(lljit)), ir_builder_(std::make_unique>(*context_)), types_(*context_), optimize_(conf->optimize()), cached_(cached), function_registry_(conf->function_registry()), + target_machine_(std::move(target_machine)), conf_(conf) { - - auto jtmb = std::move(MakeTargetMachineBuilder(*conf)).ValueUnsafe(); - auto maybe_tm = jtmb.createTargetMachine(); - auto target_machine = - AsArrowResult(maybe_tm, "Could not create target machine: "); - target_machine_ = std::move(target_machine).ValueUnsafe(); - llvm::TargetMachine& target_machine_ptr = *target_machine_; - auto data_layout = target_machine_->createDataLayout(); - //BuildJIT(jtmb, object_cache, data_layout, *target_machine_); - - - - llvm::orc::LLJITBuilder jit_builder; - -//#ifdef JIT_LINK_SUPPORTED -// ARROW_RETURN_NOT_OK(UseJITLinkIfEnabled(jit_builder)); -//#endif - - jit_builder.setJITTargetMachineBuilder(std::move(jtmb)); - jit_builder.setDataLayout(std::make_optional(data_layout)); - - if (object_cache.has_value()) { - jit_builder.setCompileFunctionCreator( - [&target_machine_ptr, &object_cache](llvm::orc::JITTargetMachineBuilder JTMB) - -> llvm::Expected> { - - // after compilation, the object code will be stored into the given object - // cache - return std::make_unique( - target_machine_ptr, &object_cache.value().get()); - }); - } - auto maybe_jit = jit_builder.create(); - auto jit = AsArrowResult(maybe_jit, "Could not create LLJIT instance: ").ValueUnsafe(); - - AddProcessSymbol(*jit); - lljit_ = std::move(jit); - - - - - - - - - - // LLVM 10 doesn't like the expr function name to be the same as the module name auto module_id = "gdv_module_" + std::to_string(reinterpret_cast(this)); module_ = std::make_unique(module_id, *context_); @@ -381,9 +336,46 @@ Result> Engine::Make( std::optional> object_cache) { std::call_once(llvm_init_once_flag, InitOnce); - + // Create the target machine + ARROW_ASSIGN_OR_RAISE(auto jtmb, MakeTargetMachineBuilder(*conf)); + auto maybe_tm = jtmb.createTargetMachine(); + ARROW_ASSIGN_OR_RAISE(auto target_machine, + AsArrowResult(maybe_tm, "Could not create target machine: ")); + + auto shared_target_machine = + std::shared_ptr(std::move(target_machine)); + auto data_layout = shared_target_machine->createDataLayout(); + + // Build the LLJIT instance + llvm::orc::LLJITBuilder jit_builder; + +#ifdef JIT_LINK_SUPPORTED + ARROW_RETURN_NOT_OK(UseJITLinkIfEnabled(jit_builder)); +#endif + + jit_builder.setJITTargetMachineBuilder(std::move(jtmb)); + jit_builder.setDataLayout(std::make_optional(data_layout)); + + if (object_cache.has_value()) { + jit_builder.setCompileFunctionCreator( + [tm = shared_target_machine, + &object_cache](llvm::orc::JITTargetMachineBuilder JTMB) + -> llvm::Expected> { + // after compilation, the object code will be stored into the given object + // cache + return std::make_unique( + *tm, &object_cache.value().get()); + }); + } + + auto maybe_jit = jit_builder.create(); + ARROW_ASSIGN_OR_RAISE(auto jit, + AsArrowResult(maybe_jit, "Could not create LLJIT instance: ")); + + AddProcessSymbol(*jit); + std::unique_ptr engine{ - new Engine(conf, object_cache, cached)}; + new Engine(conf, std::move(jit), std::move(shared_target_machine), cached)}; ARROW_RETURN_NOT_OK(engine->Init()); return engine; diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 8231b1550751..af086d070b4d 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -96,8 +96,9 @@ class GANDIVA_EXPORT Engine { private: Engine(const std::shared_ptr& conf, - std::optional> object_cache, - bool cached); + std::unique_ptr lljit, + std::shared_ptr target_machine, + bool cached); // Post construction init. This _must_ be called after the constructor. Status Init(); @@ -117,14 +118,6 @@ class GANDIVA_EXPORT Engine { // Remove unused functions to reduce compile time. Status RemoveUnusedFunctions(); - /* - Result> BuildJIT( - llvm::orc::JITTargetMachineBuilder jtmb, - std::optional>& object_cache, - llvm::DataLayout& data_layout, - llvm::TargetMachine* target_machine); -*/ - std::unique_ptr context_; std::unique_ptr lljit_; std::unique_ptr> ir_builder_; @@ -139,7 +132,7 @@ class GANDIVA_EXPORT Engine { bool functions_loaded_ = false; std::shared_ptr function_registry_; std::string module_ir_; - std::unique_ptr target_machine_; + std::shared_ptr target_machine_; const std::shared_ptr conf_; }; From fd8ef3cf668d7f72d8cb59c9433534dec85c2a9c Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Thu, 29 Jan 2026 00:38:58 +0000 Subject: [PATCH 3/6] cleanup --- cpp/src/gandiva/engine.h | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index af086d070b4d..59608c2661ad 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -37,7 +37,6 @@ namespace llvm::orc { class LLJIT; -class JITTargetMachineBuilder; } // namespace llvm::orc namespace gandiva { From 6c4c692bc88a3ae4b00938d0f74a977c0337a339 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Wed, 28 Jan 2026 16:41:02 -0800 Subject: [PATCH 4/6] lint --- cpp/src/gandiva/engine.cc | 7 +++---- cpp/src/gandiva/engine.h | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 0680c8292684..e243b7e95837 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -285,8 +285,7 @@ void Engine::InitOnce() { Engine::Engine(const std::shared_ptr& conf, std::unique_ptr lljit, - std::shared_ptr target_machine, - bool cached) + std::shared_ptr target_machine, bool cached) : context_(std::make_unique()), lljit_(std::move(lljit)), ir_builder_(std::make_unique>(*context_)), @@ -363,8 +362,8 @@ Result> Engine::Make( -> llvm::Expected> { // after compilation, the object code will be stored into the given object // cache - return std::make_unique( - *tm, &object_cache.value().get()); + return std::make_unique(*tm, + &object_cache.value().get()); }); } diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 59608c2661ad..43d999a7e207 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -96,8 +96,7 @@ class GANDIVA_EXPORT Engine { private: Engine(const std::shared_ptr& conf, std::unique_ptr lljit, - std::shared_ptr target_machine, - bool cached); + std::shared_ptr target_machine, bool cached); // Post construction init. This _must_ be called after the constructor. Status Init(); From a372665ac971760e7f58558c74c72e3fd4e76acd Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Wed, 28 Jan 2026 16:45:46 -0800 Subject: [PATCH 5/6] Added a comment --- cpp/src/gandiva/engine.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 43d999a7e207..20165787cb66 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -130,6 +130,8 @@ class GANDIVA_EXPORT Engine { bool functions_loaded_ = false; std::shared_ptr function_registry_; std::string module_ir_; + // The lifetime of the TargetMachine is shared with LLJIT. This prevents unnecessary + // duplication of this expensive object. std::shared_ptr target_machine_; const std::shared_ptr conf_; }; From d018c9b68795c22450aaf3df39641daadd1008d4 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Thu, 29 Jan 2026 01:37:17 +0000 Subject: [PATCH 6/6] Refactor --- cpp/src/gandiva/engine.cc | 62 +++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index e243b7e95837..496722b1ea84 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -217,6 +217,41 @@ Status UseJITLinkIfEnabled(llvm::orc::LLJITBuilder& jit_builder) { } #endif +Result> BuildJIT( + llvm::orc::JITTargetMachineBuilder jtmb, + std::shared_ptr target_machine, + std::optional> object_cache) { + auto data_layout = target_machine->createDataLayout(); + + llvm::orc::LLJITBuilder jit_builder; + +#ifdef JIT_LINK_SUPPORTED + ARROW_RETURN_NOT_OK(UseJITLinkIfEnabled(jit_builder)); +#endif + + jit_builder.setJITTargetMachineBuilder(std::move(jtmb)); + jit_builder.setDataLayout(std::make_optional(data_layout)); + + if (object_cache.has_value()) { + jit_builder.setCompileFunctionCreator( + [tm = std::move(target_machine), + &object_cache](llvm::orc::JITTargetMachineBuilder JTMB) + -> llvm::Expected> { + // after compilation, the object code will be stored into the given object + // cache + return std::make_unique(*tm, + &object_cache.value().get()); + }); + } + + auto maybe_jit = jit_builder.create(); + ARROW_ASSIGN_OR_RAISE(auto jit, + AsArrowResult(maybe_jit, "Could not create LLJIT instance: ")); + + AddProcessSymbol(*jit); + return jit; +} + arrow::Status VerifyAndLinkModule( llvm::Module& dest_module, llvm::Expected> src_module_or_error) { @@ -343,35 +378,10 @@ Result> Engine::Make( auto shared_target_machine = std::shared_ptr(std::move(target_machine)); - auto data_layout = shared_target_machine->createDataLayout(); // Build the LLJIT instance - llvm::orc::LLJITBuilder jit_builder; - -#ifdef JIT_LINK_SUPPORTED - ARROW_RETURN_NOT_OK(UseJITLinkIfEnabled(jit_builder)); -#endif - - jit_builder.setJITTargetMachineBuilder(std::move(jtmb)); - jit_builder.setDataLayout(std::make_optional(data_layout)); - - if (object_cache.has_value()) { - jit_builder.setCompileFunctionCreator( - [tm = shared_target_machine, - &object_cache](llvm::orc::JITTargetMachineBuilder JTMB) - -> llvm::Expected> { - // after compilation, the object code will be stored into the given object - // cache - return std::make_unique(*tm, - &object_cache.value().get()); - }); - } - - auto maybe_jit = jit_builder.create(); ARROW_ASSIGN_OR_RAISE(auto jit, - AsArrowResult(maybe_jit, "Could not create LLJIT instance: ")); - - AddProcessSymbol(*jit); + BuildJIT(std::move(jtmb), shared_target_machine, object_cache)); std::unique_ptr engine{ new Engine(conf, std::move(jit), std::move(shared_target_machine), cached)};