Skip to content

Commit 102a9ea

Browse files
committed
refactor guidance params in lib
1 parent 6a9216b commit 102a9ea

File tree

3 files changed

+66
-77
lines changed

3 files changed

+66
-77
lines changed

examples/cli/main.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -948,11 +948,12 @@ int main(int argc, const char* argv[]) {
948948
params.style_ratio,
949949
params.normalize_input,
950950
params.input_id_images_path.c_str(),
951-
params.skip_layers.data(),
952-
params.skip_layers.size(),
953-
params.slg_scale,
954-
params.skip_layer_start,
955-
params.skip_layer_end);
951+
sd_slg_params_t{params.skip_layers.data(),
952+
params.skip_layers.size(),
953+
params.slg_scale,
954+
params.skip_layer_start,
955+
params.skip_layer_end},
956+
sd_apg_params_t{1, 0, 0});
956957
} else {
957958
sd_image_t input_image = {(uint32_t)params.width,
958959
(uint32_t)params.height,
@@ -1016,11 +1017,12 @@ int main(int argc, const char* argv[]) {
10161017
params.style_ratio,
10171018
params.normalize_input,
10181019
params.input_id_images_path.c_str(),
1019-
params.skip_layers.data(),
1020-
params.skip_layers.size(),
1021-
params.slg_scale,
1022-
params.skip_layer_start,
1023-
params.skip_layer_end);
1020+
sd_slg_params_t{params.skip_layers.data(),
1021+
params.skip_layers.size(),
1022+
params.slg_scale,
1023+
params.skip_layer_start,
1024+
params.skip_layer_end},
1025+
sd_apg_params_t{1, 0, 0});
10241026
}
10251027
}
10261028

@@ -1059,19 +1061,19 @@ int main(int argc, const char* argv[]) {
10591061

10601062
std::string dummy_name, ext, lc_ext;
10611063
bool is_jpg;
1062-
size_t last = params.output_path.find_last_of(".");
1064+
size_t last = params.output_path.find_last_of(".");
10631065
size_t last_path = std::min(params.output_path.find_last_of("/"),
10641066
params.output_path.find_last_of("\\"));
1065-
if (last != std::string::npos // filename has extension
1066-
&& (last_path == std::string::npos || last > last_path)) {
1067+
if (last != std::string::npos // filename has extension
1068+
&& (last_path == std::string::npos || last > last_path)) {
10671069
dummy_name = params.output_path.substr(0, last);
10681070
ext = lc_ext = params.output_path.substr(last);
10691071
std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
10701072
is_jpg = lc_ext == ".jpg" || lc_ext == ".jpeg" || lc_ext == ".jpe";
10711073
} else {
10721074
dummy_name = params.output_path;
10731075
ext = lc_ext = "";
1074-
is_jpg = false;
1076+
is_jpg = false;
10751077
}
10761078
// appending ".png" to absent or unknown extension
10771079
if (!is_jpg && lc_ext != ".png") {
@@ -1083,7 +1085,7 @@ int main(int argc, const char* argv[]) {
10831085
continue;
10841086
}
10851087
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
1086-
if(is_jpg) {
1088+
if (is_jpg) {
10871089
stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
10881090
results[i].data, 90, get_image_params(params, params.seed + i).c_str());
10891091
printf("save result JPEG image to '%s'\n", final_image_path.c_str());

stable-diffusion.cpp

Lines changed: 31 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -796,11 +796,11 @@ class StableDiffusionGGML {
796796
const std::vector<float>& sigmas,
797797
int start_merge_step,
798798
SDCondition id_cond,
799-
std::vector<int> skip_layers = {},
800-
float slg_scale = 0,
801-
float skip_layer_start = 0.01,
802-
float skip_layer_end = 0.2,
803-
ggml_tensor* noise_mask = nullptr) {
799+
sd_slg_params_t slg_params = {NULL, 0, 0, 0, 0},
800+
sd_apg_params_t apg_params = {1, 0, 0},
801+
ggml_tensor* noise_mask = nullptr) {
802+
std::vector<int> skip_layers(slg_params.skip_layers, slg_params.skip_layers + slg_params.skip_layers_count);
803+
804804
LOG_DEBUG("Sample");
805805
struct ggml_init_params params;
806806
size_t data_size = ggml_row_size(init_latent->type, init_latent->ne[0]);
@@ -823,7 +823,7 @@ class StableDiffusionGGML {
823823
struct ggml_tensor* noised_input = ggml_dup_tensor(work_ctx, noise);
824824

825825
bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL;
826-
bool has_skiplayer = slg_scale != 0.0 && skip_layers.size() > 0;
826+
bool has_skiplayer = slg_params.scale != 0.0 && skip_layers.size() > 0;
827827

828828
// denoise wrapper
829829
struct ggml_tensor* out_cond = ggml_dup_tensor(work_ctx, x);
@@ -843,13 +843,8 @@ class StableDiffusionGGML {
843843
}
844844
struct ggml_tensor* denoised = ggml_dup_tensor(work_ctx, x);
845845

846-
// TODO do not hardcode
847-
float apg_eta = .08f;
848-
float apg_momentum = -.5f;
849-
float apg_norm_treshold = 15.0f;
850-
851846
std::vector<float> apg_momentum_buffer;
852-
if (apg_momentum != 0)
847+
if (apg_params.momentum != 0)
853848
apg_momentum_buffer.resize((size_t)ggml_nelements(denoised));
854849

855850
auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* {
@@ -932,7 +927,7 @@ class StableDiffusionGGML {
932927
}
933928

934929
int step_count = sigmas.size();
935-
bool is_skiplayer_step = has_skiplayer && step > (int)(skip_layer_start * step_count) && step < (int)(skip_layer_end * step_count);
930+
bool is_skiplayer_step = has_skiplayer && step > (int)(slg_params.skip_layer_start * step_count) && step < (int)(slg_params.skip_layer_end * step_count);
936931
float* skip_layer_data = NULL;
937932
if (is_skiplayer_step) {
938933
LOG_DEBUG("Skipping layers at step %d\n", step);
@@ -966,37 +961,37 @@ class StableDiffusionGGML {
966961
float dot = 0;
967962
for (int i = 0; i < ne_elements; i++) {
968963
float delta = positive_data[i] - negative_data[i];
969-
if (apg_momentum != 0) {
970-
delta += apg_momentum * apg_momentum_buffer[i];
964+
if (apg_params.momentum != 0) {
965+
delta += apg_params.momentum * apg_momentum_buffer[i];
971966
apg_momentum_buffer[i] = delta;
972967
}
973-
if (apg_norm_treshold > 0) {
968+
if (apg_params.norm_treshold > 0) {
974969
diff_norm += delta * delta;
975970
}
976-
if (apg_eta != 1.0f) {
971+
if (apg_params.eta != 1.0f) {
977972
cond_norm_sq += positive_data[i] * positive_data[i];
978973
dot += positive_data[i] * delta;
979974
}
980975
deltas[i] = delta;
981976
}
982-
if (apg_norm_treshold > 0) {
977+
if (apg_params.norm_treshold > 0) {
983978
diff_norm = std::sqrtf(diff_norm);
984-
apg_scale_factor = std::min(1.0f, apg_norm_treshold / diff_norm);
979+
apg_scale_factor = std::min(1.0f, apg_params.norm_treshold / diff_norm);
985980
}
986-
if (apg_eta != 1.0f) {
981+
if (apg_params.eta != 1.0f) {
987982
dot *= apg_scale_factor;
988983
// pre-normalize (avoids one square root and ne_elements extra divs)
989984
dot /= cond_norm_sq;
990985
}
991986

992987
for (int i = 0; i < ne_elements; i++) {
993988
deltas[i] *= apg_scale_factor;
994-
if (apg_eta != 1.0f) {
989+
if (apg_params.eta != 1.0f) {
995990
float apg_parallel = dot * positive_data[i];
996991
float apg_orthogonal = deltas[i] - apg_parallel;
997992

998993
// tweak deltas
999-
deltas[i] = apg_orthogonal + apg_eta * apg_parallel;
994+
deltas[i] = apg_orthogonal + apg_params.eta * apg_parallel;
1000995
}
1001996
}
1002997

@@ -1015,7 +1010,7 @@ class StableDiffusionGGML {
10151010
}
10161011
}
10171012
if (is_skiplayer_step) {
1018-
latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_scale;
1013+
latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_params.scale;
10191014
}
10201015
// v = latent_result, eps = latent_result
10211016
// denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
@@ -1260,11 +1255,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
12601255
float style_ratio,
12611256
bool normalize_input,
12621257
std::string input_id_images_path,
1263-
std::vector<int> skip_layers = {},
1264-
float slg_scale = 0,
1265-
float skip_layer_start = 0.01,
1266-
float skip_layer_end = 0.2,
1267-
ggml_tensor* masked_image = NULL) {
1258+
sd_slg_params_t slg_params,
1259+
sd_apg_params_t apg_params,
1260+
ggml_tensor* masked_image = NULL) {
12681261
if (seed < 0) {
12691262
// Generally, when using the provided command line, the seed is always >0.
12701263
// However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
@@ -1516,10 +1509,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15161509
sigmas,
15171510
start_merge_step,
15181511
id_cond,
1519-
skip_layers,
1520-
slg_scale,
1521-
skip_layer_start,
1522-
skip_layer_end,
1512+
slg_params,
1513+
apg_params,
15231514
noise_mask);
15241515

15251516
// struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
@@ -1588,12 +1579,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
15881579
float style_ratio,
15891580
bool normalize_input,
15901581
const char* input_id_images_path_c_str,
1591-
int* skip_layers = NULL,
1592-
size_t skip_layers_count = 0,
1593-
float slg_scale = 0,
1594-
float skip_layer_start = 0.01,
1595-
float skip_layer_end = 0.2) {
1596-
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
1582+
sd_slg_params_t slg_params,
1583+
sd_apg_params_t apg_params) {
15971584
LOG_DEBUG("txt2img %dx%d", width, height);
15981585
if (sd_ctx == NULL) {
15991586
return NULL;
@@ -1666,10 +1653,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16661653
style_ratio,
16671654
normalize_input,
16681655
input_id_images_path_c_str,
1669-
skip_layers_vec,
1670-
slg_scale,
1671-
skip_layer_start,
1672-
skip_layer_end);
1656+
slg_params,
1657+
apg_params);
16731658

16741659
size_t t1 = ggml_time_ms();
16751660

@@ -1698,12 +1683,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
16981683
float style_ratio,
16991684
bool normalize_input,
17001685
const char* input_id_images_path_c_str,
1701-
int* skip_layers = NULL,
1702-
size_t skip_layers_count = 0,
1703-
float slg_scale = 0,
1704-
float skip_layer_start = 0.01,
1705-
float skip_layer_end = 0.2) {
1706-
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
1686+
sd_slg_params_t slg_params,
1687+
sd_apg_params_t apg_params) {
17071688
LOG_DEBUG("img2img %dx%d", width, height);
17081689
if (sd_ctx == NULL) {
17091690
return NULL;
@@ -1844,10 +1825,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
18441825
style_ratio,
18451826
normalize_input,
18461827
input_id_images_path_c_str,
1847-
skip_layers_vec,
1848-
slg_scale,
1849-
skip_layer_start,
1850-
skip_layer_end,
1828+
slg_params,
1829+
apg_params,
18511830
masked_image);
18521831

18531832
size_t t2 = ggml_time_ms();

stable-diffusion.h

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,20 @@ typedef struct {
122122
uint8_t* data;
123123
} sd_image_t;
124124

125+
typedef struct {
126+
float eta;
127+
float momentum;
128+
float norm_treshold;
129+
} sd_apg_params_t;
130+
131+
typedef struct {
132+
int* skip_layers;
133+
size_t skip_layers_count;
134+
float scale;
135+
float skip_layer_start;
136+
float skip_layer_end;
137+
} sd_slg_params_t;
138+
125139
typedef struct sd_ctx_t sd_ctx_t;
126140

127141
SD_API sd_ctx_t* new_sd_ctx(const char* model_path,
@@ -166,11 +180,8 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
166180
float style_strength,
167181
bool normalize_input,
168182
const char* input_id_images_path,
169-
int* skip_layers,
170-
size_t skip_layers_count,
171-
float slg_scale,
172-
float skip_layer_start,
173-
float skip_layer_end);
183+
sd_slg_params_t slg_params,
184+
sd_apg_params_t apg_params);
174185

175186
SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
176187
sd_image_t init_image,
@@ -192,11 +203,8 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
192203
float style_strength,
193204
bool normalize_input,
194205
const char* input_id_images_path,
195-
int* skip_layers,
196-
size_t skip_layers_count,
197-
float slg_scale,
198-
float skip_layer_start,
199-
float skip_layer_end);
206+
sd_slg_params_t slg_params,
207+
sd_apg_params_t apg_params);
200208

201209
SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
202210
sd_image_t init_image,

0 commit comments

Comments
 (0)