diff --git a/backends/qualcomm/aot/wrappers/QuantizeParamsWrapper.h b/backends/qualcomm/aot/wrappers/QuantizeParamsWrapper.h index 86d137723aa..f22f3dbf618 100644 --- a/backends/qualcomm/aot/wrappers/QuantizeParamsWrapper.h +++ b/backends/qualcomm/aot/wrappers/QuantizeParamsWrapper.h @@ -70,9 +70,9 @@ class UndefinedQuantizeParamsWrapper final : public QuantizeParamsWrapper { } Qnn_QuantizeParams_t CreateQuantizeParams() override { - Qnn_QuantizeParams_t rval = { - .encodingDefinition = GetEncodingDefinition(), - .quantizationEncoding = GetQuantizationEncoding()}; + Qnn_QuantizeParams_t rval; + rval.encodingDefinition = GetEncodingDefinition(); + rval.quantizationEncoding = GetQuantizationEncoding(); return rval; } }; diff --git a/backends/qualcomm/aot/wrappers/TensorWrapper.h b/backends/qualcomm/aot/wrappers/TensorWrapper.h index d8661acc492..98f59532afb 100644 --- a/backends/qualcomm/aot/wrappers/TensorWrapper.h +++ b/backends/qualcomm/aot/wrappers/TensorWrapper.h @@ -130,9 +130,12 @@ class TensorWrapper { std::unique_ptr owned_data_; bool created_{false}; - Qnn_Tensor_t tensor_ = { - .version = QNN_TENSOR_VERSION_2, - .v2 = QNN_TENSOR_V2_INIT}; + Qnn_Tensor_t tensor_ = []() noexcept { + Qnn_Tensor_t t{}; + t.version = QNN_TENSOR_VERSION_2; + t.v2 = QNN_TENSOR_V2_INIT; + return t; + }(); }; // base function for Create TensorWrapper std::shared_ptr CreateTensorWrapper( diff --git a/backends/qualcomm/runtime/QnnExecuTorch.h b/backends/qualcomm/runtime/QnnExecuTorch.h index 9699e5b4735..e046bbf6364 100644 --- a/backends/qualcomm/runtime/QnnExecuTorch.h +++ b/backends/qualcomm/runtime/QnnExecuTorch.h @@ -27,6 +27,16 @@ #define QNN_RUNTIME_LPAI_CORE_SELECTION "qnn_runtime_lpai_core_selection" #define QNN_RUNTIME_HEAP_PROFILING_PATH "qnn_runtime_heap_profiling_path" +#if defined(_MSC_VER) +#if defined(QNN_EXECUTORCH_BUILDING_DLL) +#define QNN_EXECUTORCH_EXPORT __declspec(dllexport) +#else +#define QNN_EXECUTORCH_EXPORT __declspec(dllimport) +#endif +#else +#define QNN_EXECUTORCH_EXPORT __attribute__((__visibility__("default"))) +#endif + #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -69,18 +79,18 @@ struct CustomMemTensorInfo { /// alignment as MemoryAllocator::kDefaultAlignment. /// See runtime/core/memory_allocator.h. The function returns a valid pointer /// if allocation is successful. -__attribute__((__visibility__("default"))) void* QnnExecuTorchAllocCustomMem( +QNN_EXECUTORCH_EXPORT void* QnnExecuTorchAllocCustomMem( size_t bytes, size_t alignment); /// Add tensor to custom memory with custom type descriptor. Create memory /// handle to tensor wrapper during execution -__attribute__((__visibility__("default"))) void -QnnExecuTorchAddCustomMemTensorAddr(void* tensor_addr, void* custom_mem); +QNN_EXECUTORCH_EXPORT void QnnExecuTorchAddCustomMemTensorAddr( + void* tensor_addr, + void* custom_mem); /// Free the allocated shared memory. -__attribute__((__visibility__("default"))) void QnnExecuTorchFreeCustomMem( - void* buffer_ptr); +QNN_EXECUTORCH_EXPORT void QnnExecuTorchFreeCustomMem(void* buffer_ptr); #ifdef __cplusplus } diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp index b1095ca3aac..a52be8bcd76 100644 --- a/backends/qualcomm/runtime/QnnManager.cpp +++ b/backends/qualcomm/runtime/QnnManager.cpp @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/examples/models/llama/main.cpp b/examples/models/llama/main.cpp index 364efb2b7e8..cc83c890235 100644 --- a/examples/models/llama/main.cpp +++ b/examples/models/llama/main.cpp @@ -199,8 +199,8 @@ int32_t main(int32_t argc, char** argv) { } } // generate - executorch::extension::llm::GenerationConfig config{ - .temperature = temperature}; + executorch::extension::llm::GenerationConfig config{}; + config.temperature = temperature; config.ignore_eos = FLAGS_ignore_eos; config.num_bos = FLAGS_num_bos; diff --git a/examples/qualcomm/oss_scripts/llama/runner/attention_sink_rope_runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/attention_sink_rope_runner.cpp index 14fe3249486..ef187931953 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/attention_sink_rope_runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/attention_sink_rope_runner.cpp @@ -40,9 +40,9 @@ Error AttentionSinkRopeRunner::load( for (const std::string& method_name : method_names) { ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(method_name)); } - eviction_batch_size_ = ET_UNWRAP(module_->get("get_eviction_batch_size")) - .toScalar() - .to(); + ET_UNWRAP( + eviction_batch_size_evalue__, module_->get("get_eviction_batch_size")); + eviction_batch_size_ = eviction_batch_size_evalue__.toScalar().to(); return Error::Ok; } diff --git a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.cpp index f7e44292f26..7a48217cec5 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.cpp @@ -354,8 +354,9 @@ Result LhdTokenGenerator::generate( shifted_pos++; // print the token as string, decode it with the Tokenizer object - token_callback( - ET_UNWRAP_TOKENIZER(this->tokenizer_->decode(prev_token, cur_token))); + ET_UNWRAP_TOKENIZER( + decoded_token__, this->tokenizer_->decode(prev_token, cur_token)); + token_callback(decoded_token__); // data-dependent terminating condition: we have n_eos_ number of EOS if (this->eos_ids_->count(cur_token) > 0) { diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.cpp index 14a93104e1a..5f290920d83 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.cpp @@ -340,8 +340,9 @@ Result MultimodalLhdTokenGenerator::generate( pos++; // print the token as string, decode it with the Tokenizer object - token_callback( - ET_UNWRAP_TOKENIZER(this->tokenizer_->decode(prev_token, cur_token))); + ET_UNWRAP_TOKENIZER( + decoded_token__, this->tokenizer_->decode(prev_token, cur_token)); + token_callback(decoded_token__); // data-dependent terminating condition: we have n_eos_ number of EOS if (this->eos_ids_->count(cur_token) > 0) { diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.cpp index 32e3baf27a9..09077117725 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.cpp @@ -226,8 +226,8 @@ Error QNNMultimodalRunner::load() { ET_LOG(Info, "Reading metadata from model"); // retrieve any method meta, can be either prefill or kv - int64_t num_layers = - ET_UNWRAP(text_decoder_->get("get_n_layers")).toScalar().to(); + ET_UNWRAP(num_layers_evalue__, text_decoder_->get("get_n_layers")); + int64_t num_layers = num_layers_evalue__.toScalar().to(); ET_CHECK_MSG(num_layers != -1, "Could not retrieve num layers"); // k_cache: [1, n_heads, head_dim, seq_len] @@ -295,8 +295,9 @@ Error QNNMultimodalRunner::load() { // attention int32_t sliding_window = context_len_; if (text_decoder_->method_names()->count("get_sliding_window") > 0) { - sliding_window = - ET_UNWRAP(text_decoder_->get("get_sliding_window")).toInt(); + ET_UNWRAP( + sliding_window_evalue__, text_decoder_->get("get_sliding_window")); + sliding_window = sliding_window_evalue__.toInt(); } kv_manager_ = std::make_unique>(typename KVManager::Metadata{ context_len_, @@ -522,8 +523,9 @@ executorch::runtime::Error QNNMultimodalRunner::generate( // print the first token from prefill. No prev_token so use cur_token for // it. if (token_callback) { - token_callback( - ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token))); + ET_UNWRAP_TOKENIZER( + decoded_token__, tokenizer_->decode(cur_token, cur_token)); + token_callback(decoded_token__); } ET_LOG( Info, @@ -533,8 +535,15 @@ executorch::runtime::Error QNNMultimodalRunner::generate( // start the main loop prompt_tokens.push_back(cur_token); - int64_t num_generated_tokens = ET_UNWRAP(token_generator_->generate( - prompt_tokens, cur_pos_, seq_len, token_callback, dump_logits, nullptr)); + ET_UNWRAP( + num_generated_tokens, + token_generator_->generate( + prompt_tokens, + cur_pos_, + seq_len, + token_callback, + dump_logits, + nullptr)); stats_.inference_end_ms = time_in_ms(); ET_LOG( Info, diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp index 0a4a8b9abb5..74644a75ad1 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp @@ -230,8 +230,8 @@ Error Runner::load() { ET_LOG(Info, "Reading metadata from model"); // retrieve any method meta, can be either prefill or kv - int64_t num_layers = - ET_UNWRAP(module_->get("get_n_layers")).toScalar().to(); + ET_UNWRAP(num_layers_evalue__, module_->get("get_n_layers")); + int64_t num_layers = num_layers_evalue__.toScalar().to(); ET_CHECK_MSG(num_layers != -1, "Could not retrieve num layers"); // k_cache: [1, n_heads, head_dim, seq_len] @@ -273,7 +273,8 @@ Error Runner::load() { // attention int32_t sliding_window = context_len_; if (module_->method_names()->count("get_sliding_window") > 0) { - sliding_window = ET_UNWRAP(module_->get("get_sliding_window")).toInt(); + ET_UNWRAP(sliding_window_evalue__, module_->get("get_sliding_window")); + sliding_window = sliding_window_evalue__.toInt(); } kv_manager_ = std::make_unique>(typename KVManager::Metadata{ context_len_, @@ -457,8 +458,9 @@ Error Runner::generate_from_prompt_or_file( // print the first token from prefill. No prev_token so use cur_token for // it. if (token_callback) { - token_callback( - ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token))); + ET_UNWRAP_TOKENIZER( + decoded_token__, tokenizer_->decode(cur_token, cur_token)); + token_callback(decoded_token__); } ET_LOG( Info, @@ -467,13 +469,15 @@ Error Runner::generate_from_prompt_or_file( // start the main loop prompt_tokens.push_back(cur_token); - int64_t num_generated_tokens = ET_UNWRAP(token_generator_->generate( - prompt_tokens, - cur_pos_, - seq_len, - token_callback, - dump_logits, - attention_sink_rope_runner_.get())); + ET_UNWRAP( + num_generated_tokens, + token_generator_->generate( + prompt_tokens, + cur_pos_, + seq_len, + token_callback, + dump_logits, + attention_sink_rope_runner_.get())); stats_.inference_end_ms = time_in_ms(); ET_LOG( Info, diff --git a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp index 8ab82d932e1..48e3d7d6510 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp @@ -336,8 +336,9 @@ Result TokenGenerator::generate( pos++; // print the token as string, decode it with the Tokenizer object - token_callback( - ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token))); + ET_UNWRAP_TOKENIZER( + decoded_token__, tokenizer_->decode(prev_token, cur_token)); + token_callback(decoded_token__); // data-dependent terminating condition: we have n_eos_ number of EOS if (eos_ids_->count(cur_token) > 0) { diff --git a/examples/qualcomm/oss_scripts/llama/runner/utils.h b/examples/qualcomm/oss_scripts/llama/runner/utils.h index bef6b1a2017..576163c5995 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/utils.h +++ b/examples/qualcomm/oss_scripts/llama/runner/utils.h @@ -10,6 +10,7 @@ #include #include #include +#include // Template struct to hold tensor data and tensor template diff --git a/examples/qualcomm/oss_scripts/t5/runner/runner.cpp b/examples/qualcomm/oss_scripts/t5/runner/runner.cpp index 8f678325734..d687d6138c5 100644 --- a/examples/qualcomm/oss_scripts/t5/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/t5/runner/runner.cpp @@ -180,8 +180,9 @@ Error Runner::generate( output_token_ids.push_back(cur_token); if (token_callback) { - token_callback( - ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token))); + ET_UNWRAP_TOKENIZER( + decoded_token__, tokenizer_->decode(prev_token, cur_token)); + token_callback(decoded_token__); } if (eos_ids_->count(cur_token) > 0) { ET_LOG(Info, "\nReached to the end of generation"); diff --git a/examples/qualcomm/oss_scripts/whisper/runner/runner.cpp b/examples/qualcomm/oss_scripts/whisper/runner/runner.cpp index c98326778bf..fcbbfd6a973 100644 --- a/examples/qualcomm/oss_scripts/whisper/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/whisper/runner/runner.cpp @@ -171,8 +171,9 @@ Error Runner::transcribe( ++pos; if (token_callback) { - token_callback( - ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token))); + ET_UNWRAP_TOKENIZER( + decoded_token__, tokenizer_->decode(prev_token, cur_token)); + token_callback(decoded_token__); } if (eos_ids_->count(cur_token) > 0) { ET_LOG(Info, "\nReached to the end of generation"); diff --git a/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp b/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp index 9ee7551650a..8dd6206367d 100644 --- a/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp +++ b/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp @@ -400,8 +400,7 @@ void KVCachedMemory::prepare_io( for (int i = 0, range = 1024 / thread_pool_.num_workers(); i < thread_pool_.num_workers(); ++i) { - lr_update_kv_.push_back( - {.start = i * range, .end = (i + 1) * range, .step = 1}); + lr_update_kv_.push_back({i * range, (i + 1) * range, 1}); } } } diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h index 6bfde46eda0..972443ee13d 100644 --- a/extension/llm/runner/util.h +++ b/extension/llm/runner/util.h @@ -19,18 +19,19 @@ #include #endif -#define ET_UNWRAP_TOKENIZER(result__) \ - ({ \ - auto tk_result__ = (result__); \ - if (!tk_result__.ok()) { \ - ET_LOG( \ - Error, \ - "Tokenizers error code %d", \ - static_cast(tk_result__.error())); \ - return ::executorch::runtime::Error::InvalidArgument; \ - } \ - std::move(*tk_result__); \ - }) +// The internal result variable is named et_unwrap_result_##var__ rather than +// a fixed name so that multiple ET_UNWRAP_TOKENIZER calls in the same scope +// do not collide with each other. +#define ET_UNWRAP_TOKENIZER(var__, result__) \ + auto et_unwrap_result_##var__ = (result__); \ + if (!et_unwrap_result_##var__.ok()) { \ + ET_LOG( \ + Error, \ + "Tokenizers error code %d", \ + static_cast(et_unwrap_result_##var__.error())); \ + return ::executorch::runtime::Error::InvalidArgument; \ + } \ + auto var__ = std::move(*et_unwrap_result_##var__); #define ET_CHECK_TK_OK_OR_RETURN_ERROR(result__, ...) \ do { \ diff --git a/runtime/core/exec_aten/util/scalar_type_util.h b/runtime/core/exec_aten/util/scalar_type_util.h index 4470d39173a..f48b50a0786 100644 --- a/runtime/core/exec_aten/util/scalar_type_util.h +++ b/runtime/core/exec_aten/util/scalar_type_util.h @@ -916,7 +916,7 @@ struct promote_types { #define ET_INTERNAL_SWITCH(TYPE, CONTEXT, NAME, ...) \ [&] { \ const auto& _st = TYPE; \ - constexpr const char* et_switch_name = NAME; \ + const char* et_switch_name = NAME; \ (void)et_switch_name; /* Suppress unused var */ \ C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") \ switch (_st) { \ diff --git a/runtime/core/result.h b/runtime/core/result.h index 377573e6dfa..233d7513a64 100644 --- a/runtime/core/result.h +++ b/runtime/core/result.h @@ -215,54 +215,53 @@ using ::executorch::runtime::Result; } // namespace torch /** - * Unwrap a Result to obtain its value. If the Result contains an error, - * propogate the error via trivial function return. + * Unwrap a Result to obtain its value, declaring var__ in the current + * scope. If the Result contains an error, propagate the error via trivial + * function return. * * Note: A function using ET_UNWRAP should itself return a Result or Error. * + * @param[in] var__ Name of the variable to declare and assign the unwrapped + * value to. * @param[in] result__ Expression yielding the result to unwrap. * @param[in] ... Optional format string for the log error message and its - * arguments. + * arguments. */ -#define ET_UNWRAP(result__, ...) ET_INTERNAL_UNWRAP(result__, ##__VA_ARGS__) +#define ET_UNWRAP(...) \ + ET_INTERNAL_UNWRAP_EXPAND(ET_INTERNAL_UNWRAP_SELECT( \ + __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)) // Internal only: Use ET_UNWRAP() instead. -#define ET_INTERNAL_UNWRAP(...) \ - ET_INTERNAL_UNWRAP_SELECT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) \ - (__VA_ARGS__) +#define ET_INTERNAL_UNWRAP_EXPAND(x) x // Internal only: Use ET_UNWRAP() instead. -#define ET_INTERNAL_UNWRAP_SELECT( \ - _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) \ +#define ET_INTERNAL_UNWRAP_SELECT( \ + _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, N, ...) \ ET_INTERNAL_UNWRAP_##N // Internal only: Use ET_UNWRAP() instead. -#define ET_INTERNAL_UNWRAP_1(result__) \ - ({ \ - auto et_result__ = (result__); \ - if (!et_result__.ok()) { \ - return et_result__.error(); \ - } \ - std::move(*et_result__); \ - }) +#define ET_INTERNAL_UNWRAP_2(var__, result__) \ + auto et_unwrap_result_##var__ = (result__); \ + if (!et_unwrap_result_##var__.ok()) { \ + return et_unwrap_result_##var__.error(); \ + } \ + auto var__ = std::move(*et_unwrap_result_##var__) // Internal only: Use ET_UNWRAP() instead. -#define ET_INTERNAL_UNWRAP_2(result__, message__, ...) \ - ({ \ - auto et_result__ = (result__); \ - if (!et_result__.ok()) { \ - ET_LOG(Error, message__, ##__VA_ARGS__); \ - return et_result__.error(); \ - } \ - std::move(*et_result__); \ - }) +#define ET_INTERNAL_UNWRAP_3(var__, result__, message__, ...) \ + auto et_unwrap_result_##var__ = (result__); \ + if (!et_unwrap_result_##var__.ok()) { \ + ET_LOG(Error, message__, ##__VA_ARGS__); \ + return et_unwrap_result_##var__.error(); \ + } \ + auto var__ = std::move(*et_unwrap_result_##var__) // Internal only: Use ET_UNWRAP() instead. -#define ET_INTERNAL_UNWRAP_3 ET_INTERNAL_UNWRAP_2 -#define ET_INTERNAL_UNWRAP_4 ET_INTERNAL_UNWRAP_2 -#define ET_INTERNAL_UNWRAP_5 ET_INTERNAL_UNWRAP_2 -#define ET_INTERNAL_UNWRAP_6 ET_INTERNAL_UNWRAP_2 -#define ET_INTERNAL_UNWRAP_7 ET_INTERNAL_UNWRAP_2 -#define ET_INTERNAL_UNWRAP_8 ET_INTERNAL_UNWRAP_2 -#define ET_INTERNAL_UNWRAP_9 ET_INTERNAL_UNWRAP_2 -#define ET_INTERNAL_UNWRAP_10 ET_INTERNAL_UNWRAP_2 +#define ET_INTERNAL_UNWRAP_4 ET_INTERNAL_UNWRAP_3 +#define ET_INTERNAL_UNWRAP_5 ET_INTERNAL_UNWRAP_3 +#define ET_INTERNAL_UNWRAP_6 ET_INTERNAL_UNWRAP_3 +#define ET_INTERNAL_UNWRAP_7 ET_INTERNAL_UNWRAP_3 +#define ET_INTERNAL_UNWRAP_8 ET_INTERNAL_UNWRAP_3 +#define ET_INTERNAL_UNWRAP_9 ET_INTERNAL_UNWRAP_3 +#define ET_INTERNAL_UNWRAP_10 ET_INTERNAL_UNWRAP_3 +#define ET_INTERNAL_UNWRAP_11 ET_INTERNAL_UNWRAP_3