pytorch · zhaoxul-qti · May 4, 2026
@@ -70,9 +70,9 @@ class UndefinedQuantizeParamsWrapper final : public QuantizeParamsWrapper {
   }
 
   Qnn_QuantizeParams_t CreateQuantizeParams() override {
-    Qnn_QuantizeParams_t rval = {
-        .encodingDefinition = GetEncodingDefinition(),
-        .quantizationEncoding = GetQuantizationEncoding()};
+    Qnn_QuantizeParams_t rval;
+    rval.encodingDefinition = GetEncodingDefinition();
+    rval.quantizationEncoding = GetQuantizationEncoding();
     return rval;
   }
 };

@@ -130,9 +130,12 @@ class TensorWrapper {
   std::unique_ptr<char[]> owned_data_;
   bool created_{false};
 
-  Qnn_Tensor_t tensor_ = {
-      .version = QNN_TENSOR_VERSION_2,
-      .v2 = QNN_TENSOR_V2_INIT};
+  Qnn_Tensor_t tensor_ = []() noexcept {
+    Qnn_Tensor_t t{};
+    t.version = QNN_TENSOR_VERSION_2;
+    t.v2 = QNN_TENSOR_V2_INIT;
+    return t;
+  }();
 };
 // base function for Create TensorWrapper
 std::shared_ptr<TensorWrapper> CreateTensorWrapper(

@@ -26,6 +26,16 @@
 #define QNN_RUNTIME_LPAI_AFFINITY "qnn_runtime_lpai_affinity"
 #define QNN_RUNTIME_LPAI_CORE_SELECTION "qnn_runtime_lpai_core_selection"
 
+#if defined(_MSC_VER)
+#if defined(QNN_EXECUTORCH_BUILDING_DLL)
+#define QNN_EXECUTORCH_EXPORT __declspec(dllexport)
+#else
+#define QNN_EXECUTORCH_EXPORT __declspec(dllimport)
+#endif
+#else
+#define QNN_EXECUTORCH_EXPORT __attribute__((__visibility__("default")))
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
@@ -68,18 +78,14 @@ struct CustomMemTensorInfo {
 /// alignment as MemoryAllocator::kDefaultAlignment.
 /// See runtime/core/memory_allocator.h. The function returns a valid pointer
 /// if allocation is successful.
-__attribute__((__visibility__("default"))) void* QnnExecuTorchAllocCustomMem(
-    size_t bytes,
-    size_t alignment);
+QNN_EXECUTORCH_EXPORT void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment);
 
 /// Add tensor to custom memory with custom type descriptor. Create memory
 /// handle to tensor wrapper during execution
-__attribute__((__visibility__("default"))) void
-QnnExecuTorchAddCustomMemTensorAddr(void* tensor_addr, void* custom_mem);
+QNN_EXECUTORCH_EXPORT void QnnExecuTorchAddCustomMemTensorAddr(void* tensor_addr, void* custom_mem);
 
 /// Free the allocated shared memory.
-__attribute__((__visibility__("default"))) void QnnExecuTorchFreeCustomMem(
-    void* buffer_ptr);
+QNN_EXECUTORCH_EXPORT void QnnExecuTorchFreeCustomMem(void* buffer_ptr);
 
 #ifdef __cplusplus
 }

@@ -7,6 +7,7 @@
  */
 
 #include <executorch/backends/qualcomm/runtime/QnnBackendOptions.h>
+#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
 #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>

@@ -199,8 +199,8 @@ int32_t main(int32_t argc, char** argv) {
     }
   }
   // generate
-  executorch::extension::llm::GenerationConfig config{
-      .temperature = temperature};
+  executorch::extension::llm::GenerationConfig config{};
+  config.temperature = temperature;
 
   config.ignore_eos = FLAGS_ignore_eos;
   config.num_bos = FLAGS_num_bos;

@@ -40,9 +40,8 @@ Error AttentionSinkRopeRunner::load(
   for (const std::string& method_name : method_names) {
     ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(method_name));
   }
-  eviction_batch_size_ = ET_UNWRAP(module_->get("get_eviction_batch_size"))
-                             .toScalar()
-                             .to<int64_t>();
+  ET_UNWRAP(eviction_batch_size_evalue__, module_->get("get_eviction_batch_size"));
+  eviction_batch_size_ = eviction_batch_size_evalue__.toScalar().to<int64_t>();
   return Error::Ok;
 }
 

@@ -354,8 +354,8 @@ Result<int64_t> LhdTokenGenerator<T>::generate(
       shifted_pos++;
 
       // print the token as string, decode it with the Tokenizer object
-      token_callback(
-          ET_UNWRAP_TOKENIZER(this->tokenizer_->decode(prev_token, cur_token)));
+      ET_UNWRAP_TOKENIZER(decoded_token__, this->tokenizer_->decode(prev_token, cur_token));
+      token_callback(decoded_token__);
 
       // data-dependent terminating condition: we have n_eos_ number of EOS
       if (this->eos_ids_->count(cur_token) > 0) {

@@ -340,8 +340,8 @@ Result<int64_t> MultimodalLhdTokenGenerator<T>::generate(
       pos++;
 
       // print the token as string, decode it with the Tokenizer object
-      token_callback(
-          ET_UNWRAP_TOKENIZER(this->tokenizer_->decode(prev_token, cur_token)));
+      ET_UNWRAP_TOKENIZER(decoded_token__, this->tokenizer_->decode(prev_token, cur_token));
+      token_callback(decoded_token__);
 
       // data-dependent terminating condition: we have n_eos_ number of EOS
       if (this->eos_ids_->count(cur_token) > 0) {

@@ -226,8 +226,8 @@ Error QNNMultimodalRunner<T>::load() {
 
   ET_LOG(Info, "Reading metadata from model");
   // retrieve any method meta, can be either prefill or kv
-  int64_t num_layers =
-      ET_UNWRAP(text_decoder_->get("get_n_layers")).toScalar().to<int64_t>();
+  ET_UNWRAP(num_layers_evalue__, text_decoder_->get("get_n_layers"));
+  int64_t num_layers = num_layers_evalue__.toScalar().to<int64_t>();
 
   ET_CHECK_MSG(num_layers != -1, "Could not retrieve num layers");
   // k_cache: [1, n_heads, head_dim, seq_len]
@@ -295,8 +295,9 @@ Error QNNMultimodalRunner<T>::load() {
   // attention
   int32_t sliding_window = context_len_;
   if (text_decoder_->method_names()->count("get_sliding_window") > 0) {
+    ET_UNWRAP(sliding_window_evalue__, text_decoder_->get("get_sliding_window"));
     sliding_window =
-        ET_UNWRAP(text_decoder_->get("get_sliding_window")).toInt();
+        sliding_window_evalue__.toInt();
   }
   kv_manager_ = std::make_unique<KVManager<T>>(typename KVManager<T>::Metadata{
       context_len_,
@@ -522,8 +523,8 @@ executorch::runtime::Error QNNMultimodalRunner<T>::generate(
   // print the first token from prefill. No prev_token so use cur_token for
   // it.
   if (token_callback) {
-    token_callback(
-        ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token)));
+    ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(cur_token, cur_token));
+    token_callback(decoded_token__);
   }
   ET_LOG(
       Info,
@@ -533,7 +534,7 @@ executorch::runtime::Error QNNMultimodalRunner<T>::generate(
   // start the main loop
   prompt_tokens.push_back(cur_token);
 
-  int64_t num_generated_tokens = ET_UNWRAP(token_generator_->generate(
+  ET_UNWRAP(num_generated_tokens, token_generator_->generate(
       prompt_tokens, cur_pos_, seq_len, token_callback, dump_logits, nullptr));
   stats_.inference_end_ms = time_in_ms();
   ET_LOG(

@@ -230,8 +230,8 @@ Error Runner<T>::load() {
 
   ET_LOG(Info, "Reading metadata from model");
   // retrieve any method meta, can be either prefill or kv
-  int64_t num_layers =
-      ET_UNWRAP(module_->get("get_n_layers")).toScalar().to<int64_t>();
+  ET_UNWRAP(num_layers_evalue__, module_->get("get_n_layers"));
+  int64_t num_layers = num_layers_evalue__.toScalar().to<int64_t>();
 
   ET_CHECK_MSG(num_layers != -1, "Could not retrieve num layers");
   // k_cache: [1, n_heads, head_dim, seq_len]
@@ -273,7 +273,8 @@ Error Runner<T>::load() {
   // attention
   int32_t sliding_window = context_len_;
   if (module_->method_names()->count("get_sliding_window") > 0) {
-    sliding_window = ET_UNWRAP(module_->get("get_sliding_window")).toInt();
+    ET_UNWRAP(sliding_window_evalue__, module_->get("get_sliding_window"));
+    sliding_window = sliding_window_evalue__.toInt();
   }
   kv_manager_ = std::make_unique<KVManager<T>>(typename KVManager<T>::Metadata{
       context_len_,
@@ -457,8 +458,8 @@ Error Runner<T>::generate_from_prompt_or_file(
   // print the first token from prefill. No prev_token so use cur_token for
   // it.
   if (token_callback) {
-    token_callback(
-        ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token)));
+    ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(cur_token, cur_token));
+    token_callback(decoded_token__);
   }
   ET_LOG(
       Info,
@@ -467,7 +468,7 @@ Error Runner<T>::generate_from_prompt_or_file(
 
   // start the main loop
   prompt_tokens.push_back(cur_token);
-  int64_t num_generated_tokens = ET_UNWRAP(token_generator_->generate(
+  ET_UNWRAP(num_generated_tokens, token_generator_->generate(
       prompt_tokens,
       cur_pos_,
       seq_len,

@@ -336,8 +336,8 @@ Result<int64_t> TokenGenerator<T>::generate(
     pos++;
 
     // print the token as string, decode it with the Tokenizer object
-    token_callback(
-        ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token)));
+    ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(prev_token, cur_token));
+    token_callback(decoded_token__);
 
     // data-dependent terminating condition: we have n_eos_ number of EOS
     if (eos_ids_->count(cur_token) > 0) {

@@ -10,6 +10,7 @@
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <cstddef>
 #include <memory>
+#include <vector>
 
 // Template struct to hold tensor data and tensor
 template <typename T>

@@ -180,8 +180,8 @@ Error Runner::generate(
     output_token_ids.push_back(cur_token);
 
     if (token_callback) {
-      token_callback(
-          ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token)));
+      ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(prev_token, cur_token));
+      token_callback(decoded_token__);
     }
     if (eos_ids_->count(cur_token) > 0) {
       ET_LOG(Info, "\nReached to the end of generation");

@@ -171,8 +171,8 @@ Error Runner::transcribe(
     ++pos;
 
     if (token_callback) {
-      token_callback(
-          ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token)));
+      ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(prev_token, cur_token));
+      token_callback(decoded_token__);
     }
     if (eos_ids_->count(cur_token) > 0) {
       ET_LOG(Info, "\nReached to the end of generation");

@@ -401,7 +401,7 @@ void KVCachedMemory::prepare_io(
          i < thread_pool_.num_workers();
          ++i) {
       lr_update_kv_.push_back(
-          {.start = i * range, .end = (i + 1) * range, .step = 1});
+          {i * range, (i + 1) * range, 1});
     }
   }
 }

@@ -19,18 +19,19 @@
 #include <sys/resource.h>
 #endif
 
-#define ET_UNWRAP_TOKENIZER(result__)                       \
-  ({                                                        \
-    auto tk_result__ = (result__);                          \
-    if (!tk_result__.ok()) {                                \
-      ET_LOG(                                               \
-          Error,                                            \
-          "Tokenizers error code %d",                       \
-          static_cast<uint32_t>(tk_result__.error()));      \
-      return ::executorch::runtime::Error::InvalidArgument; \
-    }                                                       \
-    std::move(*tk_result__);                                \
-  })
+// The internal result variable is named et_unwrap_result_##var__ rather than
+// a fixed name so that multiple ET_UNWRAP_TOKENIZER calls in the same scope
+// do not collide with each other.
+#define ET_UNWRAP_TOKENIZER(var__, result__)                      \
+  auto et_unwrap_result_##var__ = (result__);                     \
+  if (!et_unwrap_result_##var__.ok()) {                           \
+    ET_LOG(                                                       \
+        Error,                                                    \
+        "Tokenizers error code %d",                               \
+        static_cast<uint32_t>(et_unwrap_result_##var__.error())); \
+    return ::executorch::runtime::Error::InvalidArgument;         \
+  }                                                               \
+  auto var__ = std::move(*et_unwrap_result_##var__);
 
 #define ET_CHECK_TK_OK_OR_RETURN_ERROR(result__, ...)                        \
   do {                                                                       \

@@ -916,7 +916,7 @@ struct promote_types {
 #define ET_INTERNAL_SWITCH(TYPE, CONTEXT, NAME, ...)            \
   [&] {                                                         \
     const auto& _st = TYPE;                                     \
-    constexpr const char* et_switch_name = NAME;                \
+    const char* et_switch_name = NAME;                          \
     (void)et_switch_name; /* Suppress unused var */             \
     C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") \
     switch (_st) {                                              \

@@ -215,54 +215,52 @@ using ::executorch::runtime::Result;
 } // namespace torch
 
 /**
- * Unwrap a Result to obtain its value. If the Result contains an error,
- * propogate the error via trivial function return.
+ * Unwrap a Result to obtain its value, declaring var__ in the current
+ * scope. If the Result contains an error, propagate the error via trivial
+ * function return.
  *
  * Note: A function using ET_UNWRAP should itself return a Result or Error.
  *
+ * @param[in] var__ Name of the variable to declare and assign the unwrapped
+ *   value to.
  * @param[in] result__ Expression yielding the result to unwrap.
  * @param[in] ... Optional format string for the log error message and its
- * arguments.
+ *   arguments.
  */
-#define ET_UNWRAP(result__, ...) ET_INTERNAL_UNWRAP(result__, ##__VA_ARGS__)
+#define ET_UNWRAP(...) \
+  ET_INTERNAL_UNWRAP_EXPAND(ET_INTERNAL_UNWRAP_SELECT(__VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__))
 
 // Internal only: Use ET_UNWRAP() instead.
-#define ET_INTERNAL_UNWRAP(...)                                         \
-  ET_INTERNAL_UNWRAP_SELECT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) \
-  (__VA_ARGS__)
+#define ET_INTERNAL_UNWRAP_EXPAND(x) x
 
 // Internal only: Use ET_UNWRAP() instead.
-#define ET_INTERNAL_UNWRAP_SELECT(                   \
-    _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) \
+#define ET_INTERNAL_UNWRAP_SELECT(                        \
+    _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, N, ...) \
   ET_INTERNAL_UNWRAP_##N
 
 // Internal only: Use ET_UNWRAP() instead.
-#define ET_INTERNAL_UNWRAP_1(result__) \
-  ({                                   \
-    auto et_result__ = (result__);     \
-    if (!et_result__.ok()) {           \
-      return et_result__.error();      \
-    }                                  \
-    std::move(*et_result__);           \
-  })
+#define ET_INTERNAL_UNWRAP_2(var__, result__)       \
+  auto et_unwrap_result_##var__ = (result__);       \
+  if (!et_unwrap_result_##var__.ok()) {             \
+    return et_unwrap_result_##var__.error();        \
+  }                                                 \
+  auto var__ = std::move(*et_unwrap_result_##var__)
 
 // Internal only: Use ET_UNWRAP() instead.
-#define ET_INTERNAL_UNWRAP_2(result__, message__, ...) \
-  ({                                                   \
-    auto et_result__ = (result__);                     \
-    if (!et_result__.ok()) {                           \
-      ET_LOG(Error, message__, ##__VA_ARGS__);         \
-      return et_result__.error();                      \
-    }                                                  \
-    std::move(*et_result__);                           \
-  })
+#define ET_INTERNAL_UNWRAP_3(var__, result__, message__, ...) \
+  auto et_unwrap_result_##var__ = (result__);                 \
+  if (!et_unwrap_result_##var__.ok()) {                       \
+    ET_LOG(Error, message__, ##__VA_ARGS__);                  \
+    return et_unwrap_result_##var__.error();                  \
+  }                                                           \
+  auto var__ = std::move(*et_unwrap_result_##var__)
 
 // Internal only: Use ET_UNWRAP() instead.
-#define ET_INTERNAL_UNWRAP_3 ET_INTERNAL_UNWRAP_2
-#define ET_INTERNAL_UNWRAP_4 ET_INTERNAL_UNWRAP_2
-#define ET_INTERNAL_UNWRAP_5 ET_INTERNAL_UNWRAP_2
-#define ET_INTERNAL_UNWRAP_6 ET_INTERNAL_UNWRAP_2
-#define ET_INTERNAL_UNWRAP_7 ET_INTERNAL_UNWRAP_2
-#define ET_INTERNAL_UNWRAP_8 ET_INTERNAL_UNWRAP_2
-#define ET_INTERNAL_UNWRAP_9 ET_INTERNAL_UNWRAP_2
-#define ET_INTERNAL_UNWRAP_10 ET_INTERNAL_UNWRAP_2
+#define ET_INTERNAL_UNWRAP_4  ET_INTERNAL_UNWRAP_3
+#define ET_INTERNAL_UNWRAP_5  ET_INTERNAL_UNWRAP_3
+#define ET_INTERNAL_UNWRAP_6  ET_INTERNAL_UNWRAP_3
+#define ET_INTERNAL_UNWRAP_7  ET_INTERNAL_UNWRAP_3
+#define ET_INTERNAL_UNWRAP_8  ET_INTERNAL_UNWRAP_3
+#define ET_INTERNAL_UNWRAP_9  ET_INTERNAL_UNWRAP_3
+#define ET_INTERNAL_UNWRAP_10 ET_INTERNAL_UNWRAP_3
+#define ET_INTERNAL_UNWRAP_11 ET_INTERNAL_UNWRAP_3