leejet
diff --git a/‎examples/CMakeLists.txt‎
Lines changed: 1 addition & 2 deletions b/‎examples/CMakeLists.txt‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/cli/main.cpp‎
Lines changed: 45 additions & 1 deletion b/‎examples/cli/main.cpp‎
Lines changed: 45 additions & 1 deletion
diff --git a/‎examples/imatrix/CMakeLists.txt‎
Lines changed: 0 additions & 7 deletions b/‎examples/imatrix/CMakeLists.txt‎
Lines changed: 0 additions & 7 deletions
@@ -1,4 +1,3 @@
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 
-add_subdirectory(cli)
-add_subdirectory(imatrix)
+add_subdirectory(cli)
@@ -24,6 +24,10 @@
 #define STB_IMAGE_RESIZE_STATIC
 #include "stb_image_resize.h"
 
+#define IMATRIX_IMPL
+#include "imatrix.hpp"
+static IMatrixCollector g_collector;
+
 #define SAFE_STR(s) ((s) ? (s) : "")
 #define BOOL_STR(b) ((b) ? "true" : "false")
 
@@ -109,6 +113,12 @@ struct SDParams {
     bool chroma_use_dit_mask = true;
     bool chroma_use_t5_mask  = false;
     int chroma_t5_mask_pad   = 1;
+
+    /* Imatrix params */
+
+    std::string imatrix_out = "";
+
+    std::vector<std::string> imatrix_in = {};
 };
 
 void print_params(SDParams params) {
@@ -193,6 +203,8 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --type [TYPE]                      weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)\n");
     printf("                                     If not specified, the default is the type of the weight file\n");
     printf("  --tensor-type-rules [EXPRESSION]   weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")\n");
+    printf("  --imat-out [PATH]                  If set, compute the imatrix for this run and save it to the provided path");
+    printf("  --imat-in [PATH]                   Use imatrix for quantization.");
     printf("  --lora-model-dir [DIR]             lora model directory\n");
     printf("  -i, --init-img [IMAGE]             path to the input image, required by img2img\n");
     printf("  --mask [MASK]                      path to the mask image, required by img2img with mask\n");
@@ -388,6 +400,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
         {"-n", "--negative-prompt", "", &params.negative_prompt},
 
         {"", "--upscale-model", "", &params.esrgan_path},
+        {"", "--imat-out", "", &params.imatrix_out},
+
     };
 
     options.int_options = {
@@ -557,6 +571,14 @@ void parse_args(int argc, const char** argv, SDParams& params) {
         return 1;
     };
 
+    auto on_imatrix_in_arg = [&](int argc, const char** argv, int index) {
+        if (++index >= argc) {
+            return -1;
+        }
+        params.imatrix_in.push_back(argv[index]);
+        return 1;
+    };
+
     options.manual_options = {
         {"-M", "--mode", "", on_mode_arg},
         {"", "--type", "", on_type_arg},
@@ -567,6 +589,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
         {"", "--skip-layers", "", on_skip_layers_arg},
         {"-r", "--ref-image", "", on_ref_image_arg},
         {"-h", "--help", "", on_help_arg},
+        {"", "--imat-in", "", on_imatrix_in_arg},
+
     };
 
     if (!parse_options(argc, argv, options)) {
@@ -728,6 +752,10 @@ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
     fflush(out_stream);
 }
 
+static bool collect_imatrix(struct ggml_tensor* t, bool ask, void* user_data) {
+    return g_collector.collect_imatrix(t, ask, user_data);
+}
+
 int main(int argc, const char* argv[]) {
     SDParams params;
 
@@ -752,8 +780,21 @@ int main(int argc, const char* argv[]) {
         printf("%s", sd_get_system_info());
     }
 
+    if (params.imatrix_out != "") {
+        sd_set_backend_eval_callback((sd_graph_eval_callback_t)collect_imatrix, &params);
+    }
+    if (params.imatrix_out != "" || params.mode == CONVERT || params.wtype != SD_TYPE_COUNT) {
+        setConvertImatrixCollector((void*)&g_collector);
+        for (const auto& in_file : params.imatrix_in) {
+            printf("loading imatrix from '%s'\n", in_file.c_str());
+            if (!g_collector.load_imatrix(in_file.c_str())) {
+                printf("Failed to load %s\n", in_file.c_str());
+            }
+        }
+    }
+
     if (params.mode == CONVERT) {
-        bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype, params.tensor_type_rules.c_str(),NULL);
+        bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype, params.tensor_type_rules.c_str());
         if (!success) {
             fprintf(stderr,
                     "convert '%s'/'%s' to '%s' failed\n",
@@ -1060,6 +1101,9 @@ int main(int argc, const char* argv[]) {
         free(results[i].data);
         results[i].data = NULL;
     }
+    if (params.imatrix_out != "") {
+        g_collector.save_imatrix(params.imatrix_out);
+    }
     free(results);
     free_sd_ctx(sd_ctx);
     free(control_image_buffer);