Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions backends/qualcomm/aot/wrappers/QuantizeParamsWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ class UndefinedQuantizeParamsWrapper final : public QuantizeParamsWrapper {
}

Qnn_QuantizeParams_t CreateQuantizeParams() override {
Qnn_QuantizeParams_t rval = {
.encodingDefinition = GetEncodingDefinition(),
.quantizationEncoding = GetQuantizationEncoding()};
Qnn_QuantizeParams_t rval;
rval.encodingDefinition = GetEncodingDefinition();
rval.quantizationEncoding = GetQuantizationEncoding();
return rval;
}
};
Expand Down
9 changes: 6 additions & 3 deletions backends/qualcomm/aot/wrappers/TensorWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,12 @@ class TensorWrapper {
std::unique_ptr<char[]> owned_data_;
bool created_{false};

Qnn_Tensor_t tensor_ = {
.version = QNN_TENSOR_VERSION_2,
.v2 = QNN_TENSOR_V2_INIT};
Qnn_Tensor_t tensor_ = []() noexcept {
Qnn_Tensor_t t{};
t.version = QNN_TENSOR_VERSION_2;
t.v2 = QNN_TENSOR_V2_INIT;
return t;
}();
};
// base function for Create TensorWrapper
std::shared_ptr<TensorWrapper> CreateTensorWrapper(
Expand Down
20 changes: 13 additions & 7 deletions backends/qualcomm/runtime/QnnExecuTorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@
#define QNN_RUNTIME_LPAI_AFFINITY "qnn_runtime_lpai_affinity"
#define QNN_RUNTIME_LPAI_CORE_SELECTION "qnn_runtime_lpai_core_selection"

#if defined(_MSC_VER)
#if defined(QNN_EXECUTORCH_BUILDING_DLL)
#define QNN_EXECUTORCH_EXPORT __declspec(dllexport)
#else
#define QNN_EXECUTORCH_EXPORT __declspec(dllimport)
#endif
#else
#define QNN_EXECUTORCH_EXPORT __attribute__((__visibility__("default")))
#endif

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
Expand Down Expand Up @@ -68,18 +78,14 @@ struct CustomMemTensorInfo {
/// alignment as MemoryAllocator::kDefaultAlignment.
/// See runtime/core/memory_allocator.h. The function returns a valid pointer
/// if allocation is successful.
__attribute__((__visibility__("default"))) void* QnnExecuTorchAllocCustomMem(
size_t bytes,
size_t alignment);
QNN_EXECUTORCH_EXPORT void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment);

/// Add tensor to custom memory with custom type descriptor. Create memory
/// handle to tensor wrapper during execution
__attribute__((__visibility__("default"))) void
QnnExecuTorchAddCustomMemTensorAddr(void* tensor_addr, void* custom_mem);
QNN_EXECUTORCH_EXPORT void QnnExecuTorchAddCustomMemTensorAddr(void* tensor_addr, void* custom_mem);

/// Free the allocated shared memory.
__attribute__((__visibility__("default"))) void QnnExecuTorchFreeCustomMem(
void* buffer_ptr);
QNN_EXECUTORCH_EXPORT void QnnExecuTorchFreeCustomMem(void* buffer_ptr);

#ifdef __cplusplus
}
Expand Down
1 change: 1 addition & 0 deletions backends/qualcomm/runtime/QnnManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include <executorch/backends/qualcomm/runtime/QnnBackendOptions.h>
#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
#include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
Expand Down
4 changes: 2 additions & 2 deletions examples/models/llama/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ int32_t main(int32_t argc, char** argv) {
}
}
// generate
executorch::extension::llm::GenerationConfig config{
.temperature = temperature};
executorch::extension::llm::GenerationConfig config{};
config.temperature = temperature;

config.ignore_eos = FLAGS_ignore_eos;
config.num_bos = FLAGS_num_bos;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ Error AttentionSinkRopeRunner::load(
for (const std::string& method_name : method_names) {
ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(method_name));
}
eviction_batch_size_ = ET_UNWRAP(module_->get("get_eviction_batch_size"))
.toScalar()
.to<int64_t>();
ET_UNWRAP(eviction_batch_size_evalue__, module_->get("get_eviction_batch_size"));
eviction_batch_size_ = eviction_batch_size_evalue__.toScalar().to<int64_t>();
return Error::Ok;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,8 @@ Result<int64_t> LhdTokenGenerator<T>::generate(
shifted_pos++;

// print the token as string, decode it with the Tokenizer object
token_callback(
ET_UNWRAP_TOKENIZER(this->tokenizer_->decode(prev_token, cur_token)));
ET_UNWRAP_TOKENIZER(decoded_token__, this->tokenizer_->decode(prev_token, cur_token));
token_callback(decoded_token__);

// data-dependent terminating condition: we have n_eos_ number of EOS
if (this->eos_ids_->count(cur_token) > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ Result<int64_t> MultimodalLhdTokenGenerator<T>::generate(
pos++;

// print the token as string, decode it with the Tokenizer object
token_callback(
ET_UNWRAP_TOKENIZER(this->tokenizer_->decode(prev_token, cur_token)));
ET_UNWRAP_TOKENIZER(decoded_token__, this->tokenizer_->decode(prev_token, cur_token));
token_callback(decoded_token__);

// data-dependent terminating condition: we have n_eos_ number of EOS
if (this->eos_ids_->count(cur_token) > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ Error QNNMultimodalRunner<T>::load() {

ET_LOG(Info, "Reading metadata from model");
// retrieve any method meta, can be either prefill or kv
int64_t num_layers =
ET_UNWRAP(text_decoder_->get("get_n_layers")).toScalar().to<int64_t>();
ET_UNWRAP(num_layers_evalue__, text_decoder_->get("get_n_layers"));
int64_t num_layers = num_layers_evalue__.toScalar().to<int64_t>();

ET_CHECK_MSG(num_layers != -1, "Could not retrieve num layers");
// k_cache: [1, n_heads, head_dim, seq_len]
Expand Down Expand Up @@ -295,8 +295,9 @@ Error QNNMultimodalRunner<T>::load() {
// attention
int32_t sliding_window = context_len_;
if (text_decoder_->method_names()->count("get_sliding_window") > 0) {
ET_UNWRAP(sliding_window_evalue__, text_decoder_->get("get_sliding_window"));
sliding_window =
ET_UNWRAP(text_decoder_->get("get_sliding_window")).toInt();
sliding_window_evalue__.toInt();
}
kv_manager_ = std::make_unique<KVManager<T>>(typename KVManager<T>::Metadata{
context_len_,
Expand Down Expand Up @@ -522,8 +523,8 @@ executorch::runtime::Error QNNMultimodalRunner<T>::generate(
// print the first token from prefill. No prev_token so use cur_token for
// it.
if (token_callback) {
token_callback(
ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token)));
ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(cur_token, cur_token));
token_callback(decoded_token__);
}
ET_LOG(
Info,
Expand All @@ -533,7 +534,7 @@ executorch::runtime::Error QNNMultimodalRunner<T>::generate(
// start the main loop
prompt_tokens.push_back(cur_token);

int64_t num_generated_tokens = ET_UNWRAP(token_generator_->generate(
ET_UNWRAP(num_generated_tokens, token_generator_->generate(
prompt_tokens, cur_pos_, seq_len, token_callback, dump_logits, nullptr));
stats_.inference_end_ms = time_in_ms();
ET_LOG(
Expand Down
13 changes: 7 additions & 6 deletions examples/qualcomm/oss_scripts/llama/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,8 @@ Error Runner<T>::load() {

ET_LOG(Info, "Reading metadata from model");
// retrieve any method meta, can be either prefill or kv
int64_t num_layers =
ET_UNWRAP(module_->get("get_n_layers")).toScalar().to<int64_t>();
ET_UNWRAP(num_layers_evalue__, module_->get("get_n_layers"));
int64_t num_layers = num_layers_evalue__.toScalar().to<int64_t>();

ET_CHECK_MSG(num_layers != -1, "Could not retrieve num layers");
// k_cache: [1, n_heads, head_dim, seq_len]
Expand Down Expand Up @@ -273,7 +273,8 @@ Error Runner<T>::load() {
// attention
int32_t sliding_window = context_len_;
if (module_->method_names()->count("get_sliding_window") > 0) {
sliding_window = ET_UNWRAP(module_->get("get_sliding_window")).toInt();
ET_UNWRAP(sliding_window_evalue__, module_->get("get_sliding_window"));
sliding_window = sliding_window_evalue__.toInt();
}
kv_manager_ = std::make_unique<KVManager<T>>(typename KVManager<T>::Metadata{
context_len_,
Expand Down Expand Up @@ -457,8 +458,8 @@ Error Runner<T>::generate_from_prompt_or_file(
// print the first token from prefill. No prev_token so use cur_token for
// it.
if (token_callback) {
token_callback(
ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token)));
ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(cur_token, cur_token));
token_callback(decoded_token__);
}
ET_LOG(
Info,
Expand All @@ -467,7 +468,7 @@ Error Runner<T>::generate_from_prompt_or_file(

// start the main loop
prompt_tokens.push_back(cur_token);
int64_t num_generated_tokens = ET_UNWRAP(token_generator_->generate(
ET_UNWRAP(num_generated_tokens, token_generator_->generate(
prompt_tokens,
cur_pos_,
seq_len,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,8 @@ Result<int64_t> TokenGenerator<T>::generate(
pos++;

// print the token as string, decode it with the Tokenizer object
token_callback(
ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token)));
ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(prev_token, cur_token));
token_callback(decoded_token__);

// data-dependent terminating condition: we have n_eos_ number of EOS
if (eos_ids_->count(cur_token) > 0) {
Expand Down
1 change: 1 addition & 0 deletions examples/qualcomm/oss_scripts/llama/runner/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <executorch/runtime/core/exec_aten/exec_aten.h>
#include <cstddef>
#include <memory>
#include <vector>

// Template struct to hold tensor data and tensor
template <typename T>
Expand Down
4 changes: 2 additions & 2 deletions examples/qualcomm/oss_scripts/t5/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ Error Runner::generate(
output_token_ids.push_back(cur_token);

if (token_callback) {
token_callback(
ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token)));
ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(prev_token, cur_token));
token_callback(decoded_token__);
}
if (eos_ids_->count(cur_token) > 0) {
ET_LOG(Info, "\nReached to the end of generation");
Expand Down
4 changes: 2 additions & 2 deletions examples/qualcomm/oss_scripts/whisper/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ Error Runner::transcribe(
++pos;

if (token_callback) {
token_callback(
ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token)));
ET_UNWRAP_TOKENIZER(decoded_token__, tokenizer_->decode(prev_token, cur_token));
token_callback(decoded_token__);
}
if (eos_ids_->count(cur_token) > 0) {
ET_LOG(Info, "\nReached to the end of generation");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ void KVCachedMemory::prepare_io(
i < thread_pool_.num_workers();
++i) {
lr_update_kv_.push_back(
{.start = i * range, .end = (i + 1) * range, .step = 1});
{i * range, (i + 1) * range, 1});
}
}
}
Expand Down
25 changes: 13 additions & 12 deletions extension/llm/runner/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,19 @@
#include <sys/resource.h>
#endif

#define ET_UNWRAP_TOKENIZER(result__) \
({ \
auto tk_result__ = (result__); \
if (!tk_result__.ok()) { \
ET_LOG( \
Error, \
"Tokenizers error code %d", \
static_cast<uint32_t>(tk_result__.error())); \
return ::executorch::runtime::Error::InvalidArgument; \
} \
std::move(*tk_result__); \
})
// The internal result variable is named et_unwrap_result_##var__ rather than
// a fixed name so that multiple ET_UNWRAP_TOKENIZER calls in the same scope
// do not collide with each other.
#define ET_UNWRAP_TOKENIZER(var__, result__) \
auto et_unwrap_result_##var__ = (result__); \
if (!et_unwrap_result_##var__.ok()) { \
ET_LOG( \
Error, \
"Tokenizers error code %d", \
static_cast<uint32_t>(et_unwrap_result_##var__.error())); \
return ::executorch::runtime::Error::InvalidArgument; \
} \
auto var__ = std::move(*et_unwrap_result_##var__);

#define ET_CHECK_TK_OK_OR_RETURN_ERROR(result__, ...) \
do { \
Expand Down
2 changes: 1 addition & 1 deletion runtime/core/exec_aten/util/scalar_type_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ struct promote_types {
#define ET_INTERNAL_SWITCH(TYPE, CONTEXT, NAME, ...) \
[&] { \
const auto& _st = TYPE; \
constexpr const char* et_switch_name = NAME; \
const char* et_switch_name = NAME; \
(void)et_switch_name; /* Suppress unused var */ \
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") \
switch (_st) { \
Expand Down
66 changes: 32 additions & 34 deletions runtime/core/result.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,54 +215,52 @@ using ::executorch::runtime::Result;
} // namespace torch

/**
* Unwrap a Result to obtain its value. If the Result contains an error,
* propogate the error via trivial function return.
* Unwrap a Result to obtain its value, declaring var__ in the current
* scope. If the Result contains an error, propagate the error via trivial
* function return.
*
* Note: A function using ET_UNWRAP should itself return a Result or Error.
*
* @param[in] var__ Name of the variable to declare and assign the unwrapped
* value to.
* @param[in] result__ Expression yielding the result to unwrap.
* @param[in] ... Optional format string for the log error message and its
* arguments.
* arguments.
*/
#define ET_UNWRAP(result__, ...) ET_INTERNAL_UNWRAP(result__, ##__VA_ARGS__)
#define ET_UNWRAP(...) \
ET_INTERNAL_UNWRAP_EXPAND(ET_INTERNAL_UNWRAP_SELECT(__VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__))

// Internal only: Use ET_UNWRAP() instead.
#define ET_INTERNAL_UNWRAP(...) \
ET_INTERNAL_UNWRAP_SELECT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) \
(__VA_ARGS__)
#define ET_INTERNAL_UNWRAP_EXPAND(x) x

// Internal only: Use ET_UNWRAP() instead.
#define ET_INTERNAL_UNWRAP_SELECT( \
_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) \
#define ET_INTERNAL_UNWRAP_SELECT( \
_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, N, ...) \
ET_INTERNAL_UNWRAP_##N

// Internal only: Use ET_UNWRAP() instead.
#define ET_INTERNAL_UNWRAP_1(result__) \
({ \
auto et_result__ = (result__); \
if (!et_result__.ok()) { \
return et_result__.error(); \
} \
std::move(*et_result__); \
})
#define ET_INTERNAL_UNWRAP_2(var__, result__) \
auto et_unwrap_result_##var__ = (result__); \
if (!et_unwrap_result_##var__.ok()) { \
return et_unwrap_result_##var__.error(); \
} \
auto var__ = std::move(*et_unwrap_result_##var__)

// Internal only: Use ET_UNWRAP() instead.
#define ET_INTERNAL_UNWRAP_2(result__, message__, ...) \
({ \
auto et_result__ = (result__); \
if (!et_result__.ok()) { \
ET_LOG(Error, message__, ##__VA_ARGS__); \
return et_result__.error(); \
} \
std::move(*et_result__); \
})
#define ET_INTERNAL_UNWRAP_3(var__, result__, message__, ...) \
auto et_unwrap_result_##var__ = (result__); \
if (!et_unwrap_result_##var__.ok()) { \
ET_LOG(Error, message__, ##__VA_ARGS__); \
return et_unwrap_result_##var__.error(); \
} \
auto var__ = std::move(*et_unwrap_result_##var__)

// Internal only: Use ET_UNWRAP() instead.
#define ET_INTERNAL_UNWRAP_3 ET_INTERNAL_UNWRAP_2
#define ET_INTERNAL_UNWRAP_4 ET_INTERNAL_UNWRAP_2
#define ET_INTERNAL_UNWRAP_5 ET_INTERNAL_UNWRAP_2
#define ET_INTERNAL_UNWRAP_6 ET_INTERNAL_UNWRAP_2
#define ET_INTERNAL_UNWRAP_7 ET_INTERNAL_UNWRAP_2
#define ET_INTERNAL_UNWRAP_8 ET_INTERNAL_UNWRAP_2
#define ET_INTERNAL_UNWRAP_9 ET_INTERNAL_UNWRAP_2
#define ET_INTERNAL_UNWRAP_10 ET_INTERNAL_UNWRAP_2
#define ET_INTERNAL_UNWRAP_4 ET_INTERNAL_UNWRAP_3
#define ET_INTERNAL_UNWRAP_5 ET_INTERNAL_UNWRAP_3
#define ET_INTERNAL_UNWRAP_6 ET_INTERNAL_UNWRAP_3
#define ET_INTERNAL_UNWRAP_7 ET_INTERNAL_UNWRAP_3
#define ET_INTERNAL_UNWRAP_8 ET_INTERNAL_UNWRAP_3
#define ET_INTERNAL_UNWRAP_9 ET_INTERNAL_UNWRAP_3
#define ET_INTERNAL_UNWRAP_10 ET_INTERNAL_UNWRAP_3
#define ET_INTERNAL_UNWRAP_11 ET_INTERNAL_UNWRAP_3
Loading