@@ -449,10 +449,10 @@ class StableDiffusionGGML {
449449 tensor_storage_map,
450450 version);
451451 diffusion_model = std::make_shared<FluxModel>(backend,
452- offload_params_to_cpu,
453- tensor_storage_map,
454- version,
455- sd_ctx_params->chroma_use_dit_mask );
452+ offload_params_to_cpu,
453+ tensor_storage_map,
454+ version,
455+ sd_ctx_params->chroma_use_dit_mask );
456456 } else if (sd_version_is_wan (version)) {
457457 cond_stage_model = std::make_shared<T5CLIPEmbedder>(clip_backend,
458458 offload_params_to_cpu,
@@ -493,20 +493,20 @@ class StableDiffusionGGML {
493493 " " ,
494494 enable_vision);
495495 diffusion_model = std::make_shared<QwenImageModel>(backend,
496- offload_params_to_cpu,
497- tensor_storage_map,
498- " model.diffusion_model" ,
499- version);
496+ offload_params_to_cpu,
497+ tensor_storage_map,
498+ " model.diffusion_model" ,
499+ version);
500500 } else if (sd_version_is_z_image (version)) {
501501 cond_stage_model = std::make_shared<LLMEmbedder>(clip_backend,
502502 offload_params_to_cpu,
503503 tensor_storage_map,
504504 version);
505505 diffusion_model = std::make_shared<ZImageModel>(backend,
506- offload_params_to_cpu,
507- tensor_storage_map,
508- " model.diffusion_model" ,
509- version);
506+ offload_params_to_cpu,
507+ tensor_storage_map,
508+ " model.diffusion_model" ,
509+ version);
510510 } else { // SD1.x SD2.x SDXL
511511 std::map<std::string, std::string> embbeding_map;
512512 for (int i = 0 ; i < sd_ctx_params->embedding_count ; i++) {
@@ -1331,9 +1331,9 @@ class StableDiffusionGGML {
13311331 uint32_t dim = latents->ne [ggml_n_dims (latents) - 1 ];
13321332
13331333 if (preview_mode == PREVIEW_PROJ) {
1334- int64_t patch_sz = 1 ;
1335- const float (*latent_rgb_proj)[channel] = nullptr ;
1336- float * latent_rgb_bias = nullptr ;
1334+ int64_t patch_sz = 1 ;
1335+ const float (*latent_rgb_proj)[channel] = nullptr ;
1336+ float * latent_rgb_bias = nullptr ;
13371337
13381338 if (dim == 128 ) {
13391339 if (sd_version_is_flux2 (version)) {
@@ -1984,12 +1984,12 @@ class StableDiffusionGGML {
19841984 -0 .0313f , -0 .1649f , 0 .0117f , 0 .0723f , -0 .2839f , -0 .2083f , -0 .0520f , 0 .3748f ,
19851985 0 .0152f , 0 .1957f , 0 .1433f , -0 .2944f , 0 .3573f , -0 .0548f , -0 .1681f , -0 .0667f };
19861986 latents_std_vec = {
1987- 0 .4765f , 1 .0364f , 0 .4514f , 1 .1677f , 0 .5313f , 0 .4990f , 0 .4818f , 0 .5013f ,
1988- 0 .8158f , 1 .0344f , 0 .5894f , 1 .0901f , 0 .6885f , 0 .6165f , 0 .8454f , 0 .4978f ,
1989- 0 .5759f , 0 .3523f , 0 .7135f , 0 .6804f , 0 .5833f , 1 .4146f , 0 .8986f , 0 .5659f ,
1990- 0 .7069f , 0 .5338f , 0 .4889f , 0 .4917f , 0 .4069f , 0 .4999f , 0 .6866f , 0 .4093f ,
1991- 0 .5709f , 0 .6065f , 0 .6415f , 0 .4944f , 0 .5726f , 1 .2042f , 0 .5458f , 1 .6887f ,
1992- 0 .3971f , 1 .0600f , 0 .3943f , 0 .5537f , 0 .5444f , 0 .4089f , 0 .7468f , 0 .7744f };
1987+ 0 .4765f , 1 .0364f , 0 .4514f , 1 .1677f , 0 .5313f , 0 .4990f , 0 .4818f , 0 .5013f ,
1988+ 0 .8158f , 1 .0344f , 0 .5894f , 1 .0901f , 0 .6885f , 0 .6165f , 0 .8454f , 0 .4978f ,
1989+ 0 .5759f , 0 .3523f , 0 .7135f , 0 .6804f , 0 .5833f , 1 .4146f , 0 .8986f , 0 .5659f ,
1990+ 0 .7069f , 0 .5338f , 0 .4889f , 0 .4917f , 0 .4069f , 0 .4999f , 0 .6866f , 0 .4093f ,
1991+ 0 .5709f , 0 .6065f , 0 .6415f , 0 .4944f , 0 .5726f , 1 .2042f , 0 .5458f , 1 .6887f ,
1992+ 0 .3971f , 1 .0600f , 0 .3943f , 0 .5537f , 0 .5444f , 0 .4089f , 0 .7468f , 0 .7744f };
19931993 } else if (latent->ne [channel_dim] == 128 ) {
19941994 // flux2
19951995 latents_mean_vec = {-0 .0676f , -0 .0715f , -0 .0753f , -0 .0745f , 0 .0223f , 0 .0180f , 0 .0142f , 0 .0184f ,
@@ -2009,22 +2009,22 @@ class StableDiffusionGGML {
20092009 -0 .0511f , -0 .0603f , -0 .0478f , -0 .0524f , -0 .0227f , -0 .0274f , -0 .0154f , -0 .0255f ,
20102010 -0 .0572f , -0 .0565f , -0 .0518f , -0 .0496f , 0 .0116f , 0 .0054f , 0 .0163f , 0 .0104f };
20112011 latents_std_vec = {
2012- 1 .8029f , 1 .7786f , 1 .7868f , 1 .7837f , 1 .7717f , 1 .7590f , 1 .7610f , 1 .7479f ,
2013- 1 .7336f , 1 .7373f , 1 .7340f , 1 .7343f , 1 .8626f , 1 .8527f , 1 .8629f , 1 .8589f ,
2014- 1 .7593f , 1 .7526f , 1 .7556f , 1 .7583f , 1 .7363f , 1 .7400f , 1 .7355f , 1 .7394f ,
2015- 1 .7342f , 1 .7246f , 1 .7392f , 1 .7304f , 1 .7551f , 1 .7513f , 1 .7559f , 1 .7488f ,
2016- 1 .8449f , 1 .8454f , 1 .8550f , 1 .8535f , 1 .8240f , 1 .7813f , 1 .7854f , 1 .7945f ,
2017- 1 .8047f , 1 .7876f , 1 .7695f , 1 .7676f , 1 .7782f , 1 .7667f , 1 .7925f , 1 .7848f ,
2018- 1 .7579f , 1 .7407f , 1 .7483f , 1 .7368f , 1 .7961f , 1 .7998f , 1 .7920f , 1 .7925f ,
2019- 1 .7780f , 1 .7747f , 1 .7727f , 1 .7749f , 1 .7526f , 1 .7447f , 1 .7657f , 1 .7495f ,
2020- 1 .7775f , 1 .7720f , 1 .7813f , 1 .7813f , 1 .8162f , 1 .8013f , 1 .8023f , 1 .8033f ,
2021- 1 .7527f , 1 .7331f , 1 .7563f , 1 .7482f , 1 .7610f , 1 .7507f , 1 .7681f , 1 .7613f ,
2022- 1 .7665f , 1 .7545f , 1 .7828f , 1 .7726f , 1 .7896f , 1 .7999f , 1 .7864f , 1 .7760f ,
2023- 1 .7613f , 1 .7625f , 1 .7560f , 1 .7577f , 1 .7783f , 1 .7671f , 1 .7810f , 1 .7799f ,
2024- 1 .7201f , 1 .7068f , 1 .7265f , 1 .7091f , 1 .7793f , 1 .7578f , 1 .7502f , 1 .7455f ,
2025- 1 .7587f , 1 .7500f , 1 .7525f , 1 .7362f , 1 .7616f , 1 .7572f , 1 .7444f , 1 .7430f ,
2026- 1 .7509f , 1 .7610f , 1 .7634f , 1 .7612f , 1 .7254f , 1 .7135f , 1 .7321f , 1 .7226f ,
2027- 1 .7664f , 1 .7624f , 1 .7718f , 1 .7664f , 1 .7457f , 1 .7441f , 1 .7569f , 1 .7530f };
2012+ 1 .8029f , 1 .7786f , 1 .7868f , 1 .7837f , 1 .7717f , 1 .7590f , 1 .7610f , 1 .7479f ,
2013+ 1 .7336f , 1 .7373f , 1 .7340f , 1 .7343f , 1 .8626f , 1 .8527f , 1 .8629f , 1 .8589f ,
2014+ 1 .7593f , 1 .7526f , 1 .7556f , 1 .7583f , 1 .7363f , 1 .7400f , 1 .7355f , 1 .7394f ,
2015+ 1 .7342f , 1 .7246f , 1 .7392f , 1 .7304f , 1 .7551f , 1 .7513f , 1 .7559f , 1 .7488f ,
2016+ 1 .8449f , 1 .8454f , 1 .8550f , 1 .8535f , 1 .8240f , 1 .7813f , 1 .7854f , 1 .7945f ,
2017+ 1 .8047f , 1 .7876f , 1 .7695f , 1 .7676f , 1 .7782f , 1 .7667f , 1 .7925f , 1 .7848f ,
2018+ 1 .7579f , 1 .7407f , 1 .7483f , 1 .7368f , 1 .7961f , 1 .7998f , 1 .7920f , 1 .7925f ,
2019+ 1 .7780f , 1 .7747f , 1 .7727f , 1 .7749f , 1 .7526f , 1 .7447f , 1 .7657f , 1 .7495f ,
2020+ 1 .7775f , 1 .7720f , 1 .7813f , 1 .7813f , 1 .8162f , 1 .8013f , 1 .8023f , 1 .8033f ,
2021+ 1 .7527f , 1 .7331f , 1 .7563f , 1 .7482f , 1 .7610f , 1 .7507f , 1 .7681f , 1 .7613f ,
2022+ 1 .7665f , 1 .7545f , 1 .7828f , 1 .7726f , 1 .7896f , 1 .7999f , 1 .7864f , 1 .7760f ,
2023+ 1 .7613f , 1 .7625f , 1 .7560f , 1 .7577f , 1 .7783f , 1 .7671f , 1 .7810f , 1 .7799f ,
2024+ 1 .7201f , 1 .7068f , 1 .7265f , 1 .7091f , 1 .7793f , 1 .7578f , 1 .7502f , 1 .7455f ,
2025+ 1 .7587f , 1 .7500f , 1 .7525f , 1 .7362f , 1 .7616f , 1 .7572f , 1 .7444f , 1 .7430f ,
2026+ 1 .7509f , 1 .7610f , 1 .7634f , 1 .7612f , 1 .7254f , 1 .7135f , 1 .7321f , 1 .7226f ,
2027+ 1 .7664f , 1 .7624f , 1 .7718f , 1 .7664f , 1 .7457f , 1 .7441f , 1 .7569f , 1 .7530f };
20282028 }
20292029 }
20302030
@@ -3633,7 +3633,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
36333633 denoise_mask = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, init_latent->ne [0 ], init_latent->ne [1 ], init_latent->ne [2 ], 1 );
36343634 ggml_set_f32 (denoise_mask, 1 .f );
36353635
3636- sd_ctx->sd ->process_latent_out (init_latent);
3636+ if (!sd_ctx->sd ->use_tiny_autoencoder )
3637+ sd_ctx->sd ->process_latent_out (init_latent);
36373638
36383639 ggml_ext_tensor_iter (init_image_latent, [&](ggml_tensor* t, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
36393640 float value = ggml_ext_tensor_get_f32 (t, i0, i1, i2, i3);
@@ -3643,7 +3644,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
36433644 }
36443645 });
36453646
3646- sd_ctx->sd ->process_latent_in (init_latent);
3647+ if (!sd_ctx->sd ->use_tiny_autoencoder )
3648+ sd_ctx->sd ->process_latent_in (init_latent);
36473649
36483650 int64_t t2 = ggml_time_ms ();
36493651 LOG_INFO (" encode_first_stage completed, taking %" PRId64 " ms" , t2 - t1);
0 commit comments