@@ -796,11 +796,11 @@ class StableDiffusionGGML {
796796 const std::vector<float >& sigmas,
797797 int start_merge_step,
798798 SDCondition id_cond,
799- std::vector< int > skip_layers = {},
800- float slg_scale = 0 ,
801- float skip_layer_start = 0.01 ,
802- float skip_layer_end = 0.2 ,
803- ggml_tensor* noise_mask = nullptr ) {
799+ sd_slg_params_t slg_params = {NULL , 0 , 0 , 0 , 0 },
800+ sd_apg_params_t apg_params = { 1 , 0 , 0 } ,
801+ ggml_tensor* noise_mask = nullptr ) {
802+ std::vector< int > skip_layers (slg_params. skip_layers , slg_params. skip_layers + slg_params. skip_layers_count );
803+
804804 LOG_DEBUG (" Sample" );
805805 struct ggml_init_params params;
806806 size_t data_size = ggml_row_size (init_latent->type , init_latent->ne [0 ]);
@@ -823,7 +823,7 @@ class StableDiffusionGGML {
823823 struct ggml_tensor * noised_input = ggml_dup_tensor (work_ctx, noise);
824824
825825 bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL ;
826- bool has_skiplayer = slg_scale != 0.0 && skip_layers.size () > 0 ;
826+ bool has_skiplayer = slg_params. scale != 0.0 && skip_layers.size () > 0 ;
827827
828828 // denoise wrapper
829829 struct ggml_tensor * out_cond = ggml_dup_tensor (work_ctx, x);
@@ -843,13 +843,8 @@ class StableDiffusionGGML {
843843 }
844844 struct ggml_tensor * denoised = ggml_dup_tensor (work_ctx, x);
845845
846- // TODO do not hardcode
847- float apg_eta = .08f ;
848- float apg_momentum = -.5f ;
849- float apg_norm_treshold = 15 .0f ;
850-
851846 std::vector<float > apg_momentum_buffer;
852- if (apg_momentum != 0 )
847+ if (apg_params. momentum != 0 )
853848 apg_momentum_buffer.resize ((size_t )ggml_nelements (denoised));
854849
855850 auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* {
@@ -932,7 +927,7 @@ class StableDiffusionGGML {
932927 }
933928
934929 int step_count = sigmas.size ();
935- bool is_skiplayer_step = has_skiplayer && step > (int )(skip_layer_start * step_count) && step < (int )(skip_layer_end * step_count);
930+ bool is_skiplayer_step = has_skiplayer && step > (int )(slg_params. skip_layer_start * step_count) && step < (int )(slg_params. skip_layer_end * step_count);
936931 float * skip_layer_data = NULL ;
937932 if (is_skiplayer_step) {
938933 LOG_DEBUG (" Skipping layers at step %d\n " , step);
@@ -966,37 +961,37 @@ class StableDiffusionGGML {
966961 float dot = 0 ;
967962 for (int i = 0 ; i < ne_elements; i++) {
968963 float delta = positive_data[i] - negative_data[i];
969- if (apg_momentum != 0 ) {
970- delta += apg_momentum * apg_momentum_buffer[i];
964+ if (apg_params. momentum != 0 ) {
965+ delta += apg_params. momentum * apg_momentum_buffer[i];
971966 apg_momentum_buffer[i] = delta;
972967 }
973- if (apg_norm_treshold > 0 ) {
968+ if (apg_params. norm_treshold > 0 ) {
974969 diff_norm += delta * delta;
975970 }
976- if (apg_eta != 1 .0f ) {
971+ if (apg_params. eta != 1 .0f ) {
977972 cond_norm_sq += positive_data[i] * positive_data[i];
978973 dot += positive_data[i] * delta;
979974 }
980975 deltas[i] = delta;
981976 }
982- if (apg_norm_treshold > 0 ) {
977+ if (apg_params. norm_treshold > 0 ) {
983978 diff_norm = std::sqrtf (diff_norm);
984- apg_scale_factor = std::min (1 .0f , apg_norm_treshold / diff_norm);
979+ apg_scale_factor = std::min (1 .0f , apg_params. norm_treshold / diff_norm);
985980 }
986- if (apg_eta != 1 .0f ) {
981+ if (apg_params. eta != 1 .0f ) {
987982 dot *= apg_scale_factor;
988983 // pre-normalize (avoids one square root and ne_elements extra divs)
989984 dot /= cond_norm_sq;
990985 }
991986
992987 for (int i = 0 ; i < ne_elements; i++) {
993988 deltas[i] *= apg_scale_factor;
994- if (apg_eta != 1 .0f ) {
989+ if (apg_params. eta != 1 .0f ) {
995990 float apg_parallel = dot * positive_data[i];
996991 float apg_orthogonal = deltas[i] - apg_parallel;
997992
998993 // tweak deltas
999- deltas[i] = apg_orthogonal + apg_eta * apg_parallel;
994+ deltas[i] = apg_orthogonal + apg_params. eta * apg_parallel;
1000995 }
1001996 }
1002997
@@ -1015,7 +1010,7 @@ class StableDiffusionGGML {
10151010 }
10161011 }
10171012 if (is_skiplayer_step) {
1018- latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_scale ;
1013+ latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_params. scale ;
10191014 }
10201015 // v = latent_result, eps = latent_result
10211016 // denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
@@ -1260,11 +1255,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
12601255 float style_ratio,
12611256 bool normalize_input,
12621257 std::string input_id_images_path,
1263- std::vector<int > skip_layers = {},
1264- float slg_scale = 0 ,
1265- float skip_layer_start = 0.01 ,
1266- float skip_layer_end = 0.2 ,
1267- ggml_tensor* masked_image = NULL ) {
1258+ sd_slg_params_t slg_params,
1259+ sd_apg_params_t apg_params,
1260+ ggml_tensor* masked_image = NULL ) {
12681261 if (seed < 0 ) {
12691262 // Generally, when using the provided command line, the seed is always >0.
12701263 // However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
@@ -1516,10 +1509,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15161509 sigmas,
15171510 start_merge_step,
15181511 id_cond,
1519- skip_layers,
1520- slg_scale,
1521- skip_layer_start,
1522- skip_layer_end,
1512+ slg_params,
1513+ apg_params,
15231514 noise_mask);
15241515
15251516 // struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
@@ -1588,12 +1579,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
15881579 float style_ratio,
15891580 bool normalize_input,
15901581 const char * input_id_images_path_c_str,
1591- int * skip_layers = NULL ,
1592- size_t skip_layers_count = 0 ,
1593- float slg_scale = 0 ,
1594- float skip_layer_start = 0.01 ,
1595- float skip_layer_end = 0.2 ) {
1596- std::vector<int > skip_layers_vec (skip_layers, skip_layers + skip_layers_count);
1582+ sd_slg_params_t slg_params,
1583+ sd_apg_params_t apg_params) {
15971584 LOG_DEBUG (" txt2img %dx%d" , width, height);
15981585 if (sd_ctx == NULL ) {
15991586 return NULL ;
@@ -1666,10 +1653,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16661653 style_ratio,
16671654 normalize_input,
16681655 input_id_images_path_c_str,
1669- skip_layers_vec,
1670- slg_scale,
1671- skip_layer_start,
1672- skip_layer_end);
1656+ slg_params,
1657+ apg_params);
16731658
16741659 size_t t1 = ggml_time_ms ();
16751660
@@ -1698,12 +1683,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
16981683 float style_ratio,
16991684 bool normalize_input,
17001685 const char * input_id_images_path_c_str,
1701- int * skip_layers = NULL ,
1702- size_t skip_layers_count = 0 ,
1703- float slg_scale = 0 ,
1704- float skip_layer_start = 0.01 ,
1705- float skip_layer_end = 0.2 ) {
1706- std::vector<int > skip_layers_vec (skip_layers, skip_layers + skip_layers_count);
1686+ sd_slg_params_t slg_params,
1687+ sd_apg_params_t apg_params) {
17071688 LOG_DEBUG (" img2img %dx%d" , width, height);
17081689 if (sd_ctx == NULL ) {
17091690 return NULL ;
@@ -1844,10 +1825,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
18441825 style_ratio,
18451826 normalize_input,
18461827 input_id_images_path_c_str,
1847- skip_layers_vec,
1848- slg_scale,
1849- skip_layer_start,
1850- skip_layer_end,
1828+ slg_params,
1829+ apg_params,
18511830 masked_image);
18521831
18531832 size_t t2 = ggml_time_ms ();
0 commit comments