Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,15 @@ Generation Options:
medium
--skip-layer-start <float> SLG enabling point (default: 0.01)
--skip-layer-end <float> SLG disabling point (default: 0.2)
--eta <float> eta in DDIM, only for DDIM and TCD (default: 0)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM and TCD (default: 0)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
--strength <float> strength for noising/unnoising (default: 0.75)
--pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
Expand Down
4 changes: 2 additions & 2 deletions examples/common/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ struct SDGenerationParams {
&sample_params.guidance.slg.layer_end},
{"",
"--eta",
"eta in DDIM, only for DDIM and TCD (default: 0)",
"noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
&sample_params.eta},
{"",
"--flow-shift",
Expand Down Expand Up @@ -1163,7 +1163,7 @@ struct SDGenerationParams {
&high_noise_sample_params.guidance.slg.layer_end},
{"",
"--high-noise-eta",
"(high noise) eta in DDIM, only for DDIM and TCD (default: 0)",
"(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
&high_noise_sample_params.eta},
{"",
"--strength",
Expand Down
4 changes: 2 additions & 2 deletions examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,15 @@ Default Generation Options:
medium
--skip-layer-start <float> SLG enabling point (default: 0.01)
--skip-layer-end <float> SLG disabling point (default: 0.2)
--eta <float> eta in DDIM, only for DDIM and TCD (default: 0)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM and TCD (default: 0)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
--strength <float> strength for noising/unnoising (default: 0.75)
--pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
Expand Down
66 changes: 59 additions & 7 deletions src/denoiser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -789,7 +789,8 @@ static std::pair<float, float> get_ancestral_step(float sigma_from,
static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng) {
std::shared_ptr<RNG> rng,
float eta) {
int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) {
float sigma = sigmas[i];
Expand All @@ -799,7 +800,7 @@ static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
}
sd::Tensor<float> denoised = std::move(denoised_opt);
sd::Tensor<float> d = (x - denoised) / sigma;
auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1]);
auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);
x += d * (sigma_down - sigmas[i]);
if (sigmas[i + 1] > 0) {
x += sd::Tensor<float>::randn_like(x, rng) * sigma_up;
Expand All @@ -808,6 +809,52 @@ static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
return x;
}

static sd::Tensor<float> sample_euler_flow(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng,
float eta) {
int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) {
float sigma = sigmas[i];
float sigma_to = sigmas[i + 1];
auto denoised_opt = model(x, sigma, i + 1);
if (denoised_opt.empty()) {
return {};
}
sd::Tensor<float> denoised = std::move(denoised_opt);
if (sigma_to == 0) {
// x = x × (sigma_to / sigma) + denoised × (1 - (sigma_to / sigma)) // below
// = x × ( 0 / sigma) + denoised × (1 - (0 / sigma))
// = denoised
x = denoised;
} else if (eta == 0) {
// x = x + d × (sigma_to - sigma)
// = x + ((x - denoised) / sigma) × (sigma_to - sigma)
// = x + (x - denoised) × (sigma_to / sigma - 1)
// = x + x × (sigma_to / sigma) - x - denoised × (sigma_to / sigma) + denoised
// = x × (sigma_to / sigma) + denoised × (1 - (sigma_to / sigma))
float sigma_ratio = sigma_to / sigma;
x = sigma_ratio * x + (1.0 - sigma_ratio) * denoised;
} else {
float downstep_ratio = 1.0f + (sigma_to / sigma - 1.0f) * eta;
float sigma_down = sigma_to * downstep_ratio;
float sigma_ratio = sigma_down / sigma;
x = sigma_ratio * x + (1.0 - sigma_ratio) * denoised;

float alpha_scale = (1 - sigma_to) / (1 - sigma_down);

// sigma_up = √(sigma_to² - sigma_down² × alpha_scale²)
// = √(sigma_to² - sigma_to² × downstep_ratio² × alpha_scale²)
// = sigma_to × √(1 - downstep_ratio² × alpha_scale²)
float term = downstep_ratio * alpha_scale;
float sigma_up = sigma_to * std::sqrt((1.0f + term) * (1.0f - term));
x = alpha_scale * x + sd::Tensor<float>::randn_like(x, rng) * sigma_up;
}
}
return x;
}

static sd::Tensor<float> sample_euler(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas) {
Expand Down Expand Up @@ -885,7 +932,8 @@ static sd::Tensor<float> sample_dpm2(denoise_cb_t model,
static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng) {
std::shared_ptr<RNG> rng,
float eta) {
auto t_fn = [](float sigma) -> float { return -log(sigma); };
auto sigma_fn = [](float t) -> float { return exp(-t); };

Expand All @@ -896,7 +944,7 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
return {};
}
sd::Tensor<float> denoised = std::move(denoised_opt);
auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1]);
auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);

if (sigma_down == 0) {
x = denoised;
Expand Down Expand Up @@ -1368,18 +1416,22 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
sd::Tensor<float> x,
std::vector<float> sigmas,
std::shared_ptr<RNG> rng,
float eta) {
float eta,
bool is_flow_denoiser) {
switch (method) {
case EULER_A_SAMPLE_METHOD:
return sample_euler_ancestral(model, std::move(x), sigmas, rng);
if (is_flow_denoiser)
return sample_euler_flow(model, std::move(x), sigmas, rng, eta);
else
return sample_euler_ancestral(model, std::move(x), sigmas, rng, eta);
case EULER_SAMPLE_METHOD:
return sample_euler(model, std::move(x), sigmas);
case HEUN_SAMPLE_METHOD:
return sample_heun(model, std::move(x), sigmas);
case DPM2_SAMPLE_METHOD:
return sample_dpm2(model, std::move(x), sigmas);
case DPMPP2S_A_SAMPLE_METHOD:
return sample_dpmpp_2s_ancestral(model, std::move(x), sigmas, rng);
return sample_dpmpp_2s_ancestral(model, std::move(x), sigmas, rng, eta);
case DPMPP2M_SAMPLE_METHOD:
return sample_dpmpp_2m(model, std::move(x), sigmas);
case DPMPP2Mv2_SAMPLE_METHOD:
Expand Down
38 changes: 36 additions & 2 deletions src/stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1593,6 +1593,7 @@ class StableDiffusionGGML {
float eta,
int shifted_timestep,
sample_method_t method,
bool is_flow_denoiser,
const std::vector<float>& sigmas,
int start_merge_step,
const std::vector<sd::Tensor<float>>& ref_latents,
Expand Down Expand Up @@ -1791,7 +1792,7 @@ class StableDiffusionGGML {
return denoised;
};

auto x0_opt = sample_k_diffusion(method, denoise, x_t, sigmas, sampler_rng, eta);
auto x0_opt = sample_k_diffusion(method, denoise, x_t, sigmas, sampler_rng, eta, is_flow_denoiser);
if (x0_opt.empty()) {
LOG_ERROR("Diffusion model sampling failed");
if (control_net) {
Expand Down Expand Up @@ -1909,6 +1910,12 @@ class StableDiffusionGGML {
flow_denoiser->set_shift(flow_shift);
}
}

bool is_flow_denoiser() {
auto flow_denoiser = std::dynamic_pointer_cast<DiscreteFlowDenoiser>(denoiser);
return !!flow_denoiser;
}

};

/*================================================= SD API ==================================================*/
Expand Down Expand Up @@ -2225,6 +2232,7 @@ void sd_sample_params_init(sd_sample_params_t* sample_params) {
sample_params->scheduler = SCHEDULER_COUNT;
sample_params->sample_method = SAMPLE_METHOD_COUNT;
sample_params->sample_steps = 20;
sample_params->eta = INFINITY;
sample_params->custom_sigmas = nullptr;
sample_params->custom_sigmas_count = 0;
sample_params->flow_shift = INFINITY;
Expand Down Expand Up @@ -2438,6 +2446,25 @@ static scheduler_t resolve_scheduler(sd_ctx_t* sd_ctx,
return scheduler;
}

static float resolve_eta(sd_ctx_t* sd_ctx,
float eta,
enum sample_method_t sample_method) {
if (eta == INFINITY) {
switch(sample_method) {
case DDIM_TRAILING_SAMPLE_METHOD:
case TCD_SAMPLE_METHOD:
case RES_MULTISTEP_SAMPLE_METHOD:
case RES_2S_SAMPLE_METHOD:
return 0.0f;
case EULER_A_SAMPLE_METHOD:
case DPMPP2S_A_SAMPLE_METHOD:
return 1.0f;
default: ;
}
}
return eta;
}

struct GenerationRequest {
std::string prompt;
std::string negative_prompt;
Expand Down Expand Up @@ -2586,6 +2613,7 @@ struct GenerationRequest {
struct SamplePlan {
enum sample_method_t sample_method = SAMPLE_METHOD_COUNT;
enum sample_method_t high_noise_sample_method = SAMPLE_METHOD_COUNT;
float eta = 0.f;
int sample_steps = 0;
int high_noise_sample_steps = 0;
int total_steps = 0;
Expand All @@ -2597,6 +2625,7 @@ struct SamplePlan {
const sd_img_gen_params_t* sd_img_gen_params,
const GenerationRequest& request) {
sample_method = sd_img_gen_params->sample_params.sample_method;
eta = sd_img_gen_params->sample_params.eta;
sample_steps = sd_img_gen_params->sample_params.sample_steps;
resolve(sd_ctx, &request, &sd_img_gen_params->sample_params);
}
Expand Down Expand Up @@ -2644,6 +2673,8 @@ struct SamplePlan {
sd_ctx->sd->version);
}

eta = resolve_eta(sd_ctx, eta, sample_method);

if (high_noise_sample_steps < 0) {
for (size_t i = 0; i < sigmas.size(); ++i) {
if (sigmas[i] < moe_boundary) {
Expand Down Expand Up @@ -3123,9 +3154,10 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
latents.control_image,
request.control_strength,
request.guidance,
request.eta,
plan.eta,
request.shifted_timestep,
plan.sample_method,
sd_ctx->sd->is_flow_denoiser(),
plan.sigmas,
plan.start_merge_step,
latents.ref_latents,
Expand Down Expand Up @@ -3485,6 +3517,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
sd_vid_gen_params->high_noise_sample_params.eta,
request.shifted_timestep,
plan.high_noise_sample_method,
sd_ctx->sd->is_flow_denoiser(),
high_noise_sigmas,
-1,
std::vector<sd::Tensor<float>>{},
Expand Down Expand Up @@ -3526,6 +3559,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
sd_vid_gen_params->sample_params.eta,
sd_vid_gen_params->sample_params.shifted_timestep,
plan.sample_method,
sd_ctx->sd->is_flow_denoiser(),
plan.sigmas,
-1,
std::vector<sd::Tensor<float>>{},
Expand Down
Loading