Skip to content

Commit 23fb870

Browse files
committed
fix img2vid
1 parent a7a791d commit 23fb870

File tree

1 file changed

+41
-39
lines changed

1 file changed

+41
-39
lines changed

stable-diffusion.cpp

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -449,10 +449,10 @@ class StableDiffusionGGML {
449449
tensor_storage_map,
450450
version);
451451
diffusion_model = std::make_shared<FluxModel>(backend,
452-
offload_params_to_cpu,
453-
tensor_storage_map,
454-
version,
455-
sd_ctx_params->chroma_use_dit_mask);
452+
offload_params_to_cpu,
453+
tensor_storage_map,
454+
version,
455+
sd_ctx_params->chroma_use_dit_mask);
456456
} else if (sd_version_is_wan(version)) {
457457
cond_stage_model = std::make_shared<T5CLIPEmbedder>(clip_backend,
458458
offload_params_to_cpu,
@@ -493,20 +493,20 @@ class StableDiffusionGGML {
493493
"",
494494
enable_vision);
495495
diffusion_model = std::make_shared<QwenImageModel>(backend,
496-
offload_params_to_cpu,
497-
tensor_storage_map,
498-
"model.diffusion_model",
499-
version);
496+
offload_params_to_cpu,
497+
tensor_storage_map,
498+
"model.diffusion_model",
499+
version);
500500
} else if (sd_version_is_z_image(version)) {
501501
cond_stage_model = std::make_shared<LLMEmbedder>(clip_backend,
502502
offload_params_to_cpu,
503503
tensor_storage_map,
504504
version);
505505
diffusion_model = std::make_shared<ZImageModel>(backend,
506-
offload_params_to_cpu,
507-
tensor_storage_map,
508-
"model.diffusion_model",
509-
version);
506+
offload_params_to_cpu,
507+
tensor_storage_map,
508+
"model.diffusion_model",
509+
version);
510510
} else { // SD1.x SD2.x SDXL
511511
std::map<std::string, std::string> embbeding_map;
512512
for (int i = 0; i < sd_ctx_params->embedding_count; i++) {
@@ -1331,9 +1331,9 @@ class StableDiffusionGGML {
13311331
uint32_t dim = latents->ne[ggml_n_dims(latents) - 1];
13321332

13331333
if (preview_mode == PREVIEW_PROJ) {
1334-
int64_t patch_sz = 1;
1335-
const float(*latent_rgb_proj)[channel] = nullptr;
1336-
float* latent_rgb_bias = nullptr;
1334+
int64_t patch_sz = 1;
1335+
const float (*latent_rgb_proj)[channel] = nullptr;
1336+
float* latent_rgb_bias = nullptr;
13371337

13381338
if (dim == 128) {
13391339
if (sd_version_is_flux2(version)) {
@@ -1984,12 +1984,12 @@ class StableDiffusionGGML {
19841984
-0.0313f, -0.1649f, 0.0117f, 0.0723f, -0.2839f, -0.2083f, -0.0520f, 0.3748f,
19851985
0.0152f, 0.1957f, 0.1433f, -0.2944f, 0.3573f, -0.0548f, -0.1681f, -0.0667f};
19861986
latents_std_vec = {
1987-
0.4765f, 1.0364f, 0.4514f, 1.1677f, 0.5313f, 0.4990f, 0.4818f, 0.5013f,
1988-
0.8158f, 1.0344f, 0.5894f, 1.0901f, 0.6885f, 0.6165f, 0.8454f, 0.4978f,
1989-
0.5759f, 0.3523f, 0.7135f, 0.6804f, 0.5833f, 1.4146f, 0.8986f, 0.5659f,
1990-
0.7069f, 0.5338f, 0.4889f, 0.4917f, 0.4069f, 0.4999f, 0.6866f, 0.4093f,
1991-
0.5709f, 0.6065f, 0.6415f, 0.4944f, 0.5726f, 1.2042f, 0.5458f, 1.6887f,
1992-
0.3971f, 1.0600f, 0.3943f, 0.5537f, 0.5444f, 0.4089f, 0.7468f, 0.7744f};
1987+
0.4765f, 1.0364f, 0.4514f, 1.1677f, 0.5313f, 0.4990f, 0.4818f, 0.5013f,
1988+
0.8158f, 1.0344f, 0.5894f, 1.0901f, 0.6885f, 0.6165f, 0.8454f, 0.4978f,
1989+
0.5759f, 0.3523f, 0.7135f, 0.6804f, 0.5833f, 1.4146f, 0.8986f, 0.5659f,
1990+
0.7069f, 0.5338f, 0.4889f, 0.4917f, 0.4069f, 0.4999f, 0.6866f, 0.4093f,
1991+
0.5709f, 0.6065f, 0.6415f, 0.4944f, 0.5726f, 1.2042f, 0.5458f, 1.6887f,
1992+
0.3971f, 1.0600f, 0.3943f, 0.5537f, 0.5444f, 0.4089f, 0.7468f, 0.7744f};
19931993
} else if (latent->ne[channel_dim] == 128) {
19941994
// flux2
19951995
latents_mean_vec = {-0.0676f, -0.0715f, -0.0753f, -0.0745f, 0.0223f, 0.0180f, 0.0142f, 0.0184f,
@@ -2009,22 +2009,22 @@ class StableDiffusionGGML {
20092009
-0.0511f, -0.0603f, -0.0478f, -0.0524f, -0.0227f, -0.0274f, -0.0154f, -0.0255f,
20102010
-0.0572f, -0.0565f, -0.0518f, -0.0496f, 0.0116f, 0.0054f, 0.0163f, 0.0104f};
20112011
latents_std_vec = {
2012-
1.8029f, 1.7786f, 1.7868f, 1.7837f, 1.7717f, 1.7590f, 1.7610f, 1.7479f,
2013-
1.7336f, 1.7373f, 1.7340f, 1.7343f, 1.8626f, 1.8527f, 1.8629f, 1.8589f,
2014-
1.7593f, 1.7526f, 1.7556f, 1.7583f, 1.7363f, 1.7400f, 1.7355f, 1.7394f,
2015-
1.7342f, 1.7246f, 1.7392f, 1.7304f, 1.7551f, 1.7513f, 1.7559f, 1.7488f,
2016-
1.8449f, 1.8454f, 1.8550f, 1.8535f, 1.8240f, 1.7813f, 1.7854f, 1.7945f,
2017-
1.8047f, 1.7876f, 1.7695f, 1.7676f, 1.7782f, 1.7667f, 1.7925f, 1.7848f,
2018-
1.7579f, 1.7407f, 1.7483f, 1.7368f, 1.7961f, 1.7998f, 1.7920f, 1.7925f,
2019-
1.7780f, 1.7747f, 1.7727f, 1.7749f, 1.7526f, 1.7447f, 1.7657f, 1.7495f,
2020-
1.7775f, 1.7720f, 1.7813f, 1.7813f, 1.8162f, 1.8013f, 1.8023f, 1.8033f,
2021-
1.7527f, 1.7331f, 1.7563f, 1.7482f, 1.7610f, 1.7507f, 1.7681f, 1.7613f,
2022-
1.7665f, 1.7545f, 1.7828f, 1.7726f, 1.7896f, 1.7999f, 1.7864f, 1.7760f,
2023-
1.7613f, 1.7625f, 1.7560f, 1.7577f, 1.7783f, 1.7671f, 1.7810f, 1.7799f,
2024-
1.7201f, 1.7068f, 1.7265f, 1.7091f, 1.7793f, 1.7578f, 1.7502f, 1.7455f,
2025-
1.7587f, 1.7500f, 1.7525f, 1.7362f, 1.7616f, 1.7572f, 1.7444f, 1.7430f,
2026-
1.7509f, 1.7610f, 1.7634f, 1.7612f, 1.7254f, 1.7135f, 1.7321f, 1.7226f,
2027-
1.7664f, 1.7624f, 1.7718f, 1.7664f, 1.7457f, 1.7441f, 1.7569f, 1.7530f};
2012+
1.8029f, 1.7786f, 1.7868f, 1.7837f, 1.7717f, 1.7590f, 1.7610f, 1.7479f,
2013+
1.7336f, 1.7373f, 1.7340f, 1.7343f, 1.8626f, 1.8527f, 1.8629f, 1.8589f,
2014+
1.7593f, 1.7526f, 1.7556f, 1.7583f, 1.7363f, 1.7400f, 1.7355f, 1.7394f,
2015+
1.7342f, 1.7246f, 1.7392f, 1.7304f, 1.7551f, 1.7513f, 1.7559f, 1.7488f,
2016+
1.8449f, 1.8454f, 1.8550f, 1.8535f, 1.8240f, 1.7813f, 1.7854f, 1.7945f,
2017+
1.8047f, 1.7876f, 1.7695f, 1.7676f, 1.7782f, 1.7667f, 1.7925f, 1.7848f,
2018+
1.7579f, 1.7407f, 1.7483f, 1.7368f, 1.7961f, 1.7998f, 1.7920f, 1.7925f,
2019+
1.7780f, 1.7747f, 1.7727f, 1.7749f, 1.7526f, 1.7447f, 1.7657f, 1.7495f,
2020+
1.7775f, 1.7720f, 1.7813f, 1.7813f, 1.8162f, 1.8013f, 1.8023f, 1.8033f,
2021+
1.7527f, 1.7331f, 1.7563f, 1.7482f, 1.7610f, 1.7507f, 1.7681f, 1.7613f,
2022+
1.7665f, 1.7545f, 1.7828f, 1.7726f, 1.7896f, 1.7999f, 1.7864f, 1.7760f,
2023+
1.7613f, 1.7625f, 1.7560f, 1.7577f, 1.7783f, 1.7671f, 1.7810f, 1.7799f,
2024+
1.7201f, 1.7068f, 1.7265f, 1.7091f, 1.7793f, 1.7578f, 1.7502f, 1.7455f,
2025+
1.7587f, 1.7500f, 1.7525f, 1.7362f, 1.7616f, 1.7572f, 1.7444f, 1.7430f,
2026+
1.7509f, 1.7610f, 1.7634f, 1.7612f, 1.7254f, 1.7135f, 1.7321f, 1.7226f,
2027+
1.7664f, 1.7624f, 1.7718f, 1.7664f, 1.7457f, 1.7441f, 1.7569f, 1.7530f};
20282028
}
20292029
}
20302030

@@ -3633,7 +3633,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
36333633
denoise_mask = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, init_latent->ne[0], init_latent->ne[1], init_latent->ne[2], 1);
36343634
ggml_set_f32(denoise_mask, 1.f);
36353635

3636-
sd_ctx->sd->process_latent_out(init_latent);
3636+
if (!sd_ctx->sd->use_tiny_autoencoder)
3637+
sd_ctx->sd->process_latent_out(init_latent);
36373638

36383639
ggml_ext_tensor_iter(init_image_latent, [&](ggml_tensor* t, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
36393640
float value = ggml_ext_tensor_get_f32(t, i0, i1, i2, i3);
@@ -3643,7 +3644,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
36433644
}
36443645
});
36453646

3646-
sd_ctx->sd->process_latent_in(init_latent);
3647+
if (!sd_ctx->sd->use_tiny_autoencoder)
3648+
sd_ctx->sd->process_latent_in(init_latent);
36473649

36483650
int64_t t2 = ggml_time_ms();
36493651
LOG_INFO("encode_first_stage completed, taking %" PRId64 " ms", t2 - t1);

0 commit comments

Comments
 (0)