Merge leejet#1222 (improve handling of VAE decode failures)

roj234 · roj234 · commit e7473958a5f0 · 2026-02-04T04:55:50.000+08:00
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -409,7 +409,7 @@ bool save_results(const SDCliParams& cli_params,
     auto write_image = [&](const fs::path& path, int idx) {
         const sd_image_t& img = results[idx];
         if (!img.data)
-            return;
+            return false;
 
         std::string params = get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + idx);
         int ok             = 0;
@@ -419,8 +419,11 @@ bool save_results(const SDCliParams& cli_params,
             ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.c_str());
         }
         LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
+        return ok != 0;
     };
 
+    int sucessful_reults = 0;
+
     if (std::regex_search(cli_params.output_path, format_specifier_regex)) {
         if (!is_jpg && ext_lower != ".png")
             ext = ".png";
@@ -429,19 +432,26 @@ bool save_results(const SDCliParams& cli_params,
 
         for (int i = 0; i < num_results; ++i) {
             fs::path img_path = format_frame_idx(pattern.string(), output_begin_idx + i);
-            write_image(img_path, i);
+            if (write_image(img_path, i)) {
+                sucessful_reults++;
+            }
         }
-        return true;
+        LOG_INFO("%d/%d images saved", sucessful_reults, num_results);
+        return sucessful_reults != 0;
     }
 
     if (cli_params.mode == VID_GEN && num_results > 1) {
         if (ext_lower != ".avi")
             ext = ".avi";
         fs::path video_path = base_path;
         video_path += ext;
-        create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps);
-        LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str());
-        return true;
+        if (create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) {
+            LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str());
+            return true;
+        } else {
+            LOG_ERROR("Failed to save result MPG AVI video to '%s'", video_path.string().c_str());
+            return false;
+        }
     }
 
     if (!is_jpg && ext_lower != ".png")
@@ -453,10 +463,12 @@ bool save_results(const SDCliParams& cli_params,
             img_path += "_" + std::to_string(output_begin_idx + i);
         }
         img_path += ext;
-        write_image(img_path, i);
+        if (write_image(img_path, i)) {
+            sucessful_reults++;
+        }
     }
-
-    return true;
+    LOG_INFO("%d/%d images saved", sucessful_reults, num_results);
+    return sucessful_reults != 0;
 }
 
 int main(int argc, const char* argv[]) {
diff --git a/ggml_extend.hpp b/ggml_extend.hpp
@@ -767,7 +767,7 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_silu_act(ggml_context* ctx, ggml_tensor*
     return x;
 }
 
-typedef std::function<void(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;
+typedef std::function<bool(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;
 
 __STATIC_INLINE__ void sd_tiling_calc_tiles(int& num_tiles_dim,
                                             float& tile_overlap_factor_dim,
@@ -918,12 +918,15 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
 
             int64_t t1 = ggml_time_ms();
             ggml_ext_tensor_split_2d(input, input_tile, x_in, y_in);
-            on_processing(input_tile, output_tile, false);
-            ggml_ext_tensor_merge_2d(output_tile, output, x_out, y_out, overlap_x_out, overlap_y_out, dx, dy);
+            if (on_processing(input_tile, output_tile, false)) {
+                ggml_ext_tensor_merge_2d(output_tile, output, x_out, y_out, overlap_x_out, overlap_y_out, dx, dy);
 
-            int64_t t2 = ggml_time_ms();
-            last_time  = (t2 - t1) / 1000.0f;
-            pretty_progress(tile_count, num_tiles, last_time);
+                int64_t t2 = ggml_time_ms();
+                last_time  = (t2 - t1) / 1000.0f;
+                pretty_progress(tile_count, num_tiles, last_time);
+            } else {
+                LOG_ERROR("Failed to process patch %d at (%d, %d)", tile_count, x, y);
+            }
             tile_count++;
         }
         last_x = false;
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -1558,7 +1558,7 @@ class StableDiffusionGGML {
                 if (vae_tiling_params.enabled) {
                     // split latent in 32x32 tiles and compute in several steps
                     auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
-                        first_stage_model->compute(n_threads, in, true, &out, nullptr);
+                        return first_stage_model->compute(n_threads, in, true, &out, nullptr);
                     };
                     silent_tiling(latents, result, get_vae_scale_factor(), 32, 0.5f, on_tiling);
 
@@ -1577,7 +1577,7 @@ class StableDiffusionGGML {
                 if (vae_tiling_params.enabled) {
                     // split latent in 64x64 tiles and compute in several steps
                     auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
-                        tae_first_stage->compute(n_threads, in, true, &out, nullptr);
+                        return tae_first_stage->compute(n_threads, in, true, &out, nullptr);
                     };
                     silent_tiling(latents, result, get_vae_scale_factor(), 64, 0.5f, on_tiling);
                 } else {
@@ -2546,7 +2546,7 @@ class StableDiffusionGGML {
                 LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y);
 
                 auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
-                    first_stage_model->compute(n_threads, in, false, &out, work_ctx);
+                    return first_stage_model->compute(n_threads, in, false, &out, work_ctx);
                 };
                 sd_tiling_non_square(x, result, vae_scale_factor, tile_size_x, tile_size_y, tile_overlap, on_tiling);
             } else {
@@ -2557,7 +2557,7 @@ class StableDiffusionGGML {
             if (vae_tiling_params.enabled && !encode_video) {
                 // split latent in 32x32 tiles and compute in several steps
                 auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
-                    tae_first_stage->compute(n_threads, in, false, &out, nullptr);
+                    return tae_first_stage->compute(n_threads, in, false, &out, nullptr);
                 };
                 sd_tiling(x, result, vae_scale_factor, 64, 0.5f, on_tiling);
             } else {
@@ -2675,23 +2675,31 @@ class StableDiffusionGGML {
 
                 // split latent in 32x32 tiles and compute in several steps
                 auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
-                    first_stage_model->compute(n_threads, in, true, &out, nullptr);
+                    return first_stage_model->compute(n_threads, in, true, &out, nullptr);
                 };
                 sd_tiling_non_square(x, result, vae_scale_factor, tile_size_x, tile_size_y, tile_overlap, on_tiling);
             } else {
-                first_stage_model->compute(n_threads, x, true, &result, work_ctx);
+                if(!first_stage_model->compute(n_threads, x, true, &result, work_ctx)){
+                    LOG_ERROR("Failed to decode latetnts");
+                    first_stage_model->free_compute_buffer();
+                    return nullptr;
+                }
             }
             first_stage_model->free_compute_buffer();
             process_vae_output_tensor(result);
         } else {
             if (vae_tiling_params.enabled) {
                 // split latent in 64x64 tiles and compute in several steps
                 auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
-                    tae_first_stage->compute(n_threads, in, true, &out);
+                    return tae_first_stage->compute(n_threads, in, true, &out);
                 };
                 sd_tiling(x, result, vae_scale_factor, 64, 0.5f, on_tiling);
             } else {
-                tae_first_stage->compute(n_threads, x, true, &result);
+                if(!tae_first_stage->compute(n_threads, x, true, &result)){
+                    LOG_ERROR("Failed to decode latetnts");
+                    tae_first_stage->free_compute_buffer();
+                    return nullptr;
+                }
             }
             tae_first_stage->free_compute_buffer();
         }
@@ -3461,6 +3469,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
         ggml_free(work_ctx);
         return nullptr;
     }
+    memset(result_images, 0, batch_count * sizeof(sd_image_t));
 
     for (size_t i = 0; i < decoded_images.size(); i++) {
         result_images[i].width   = width;
diff --git a/upscaler.cpp b/upscaler.cpp
@@ -89,7 +89,7 @@ struct UpscalerGGML {
 
         ggml_tensor* upscaled = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, output_width, output_height, 3, 1);
         auto on_tiling        = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
-            esrgan_upscaler->compute(n_threads, in, &out);
+            return esrgan_upscaler->compute(n_threads, in, &out);
         };
         int64_t t0 = ggml_time_ms();
         sd_tiling(input_image_tensor, upscaled, esrgan_upscaler->scale, esrgan_upscaler->tile_size, 0.25f, on_tiling);