44#include " common.h"
55#include " log.h"
66
7- #include < iostream>
7+ #include < chrono>
8+ #include < cinttypes>
9+ #include < thread>
10+
11+ using namespace std ::chrono_literals;
812
913#if defined(_MSC_VER)
1014#pragma warning(disable: 4244 4267) // possible loss of data
@@ -22,13 +26,17 @@ int main(int argc, char ** argv) {
2226 llama_numa_init (params.numa );
2327 auto mparams = common_model_params_to_llama (params);
2428 auto cparams = common_context_params_to_llama (params);
25- llama_params_fit (params.model .path .c_str (), &mparams, &cparams,
29+ const bool success = llama_params_fit (params.model .path .c_str (), &mparams, &cparams,
2630 params.tensor_split , params.tensor_buft_overrides .data (), params.fit_params_target , params.fit_params_min_ctx ,
2731 params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
32+ if (!success) {
33+ LOG_ERR (" %s: failed to fit CLI arguments to free memory, exiting...\n " , __func__);
34+ exit (1 );
35+ }
2836
29- LOG_INF (" Printing fitted CLI arguments to stdout...\n " );
30- std::cout << " -c " << cparams. n_ctx ;
31- std::cout << " -ngl " << mparams.n_gpu_layers ;
37+ LOG_INF (" %s: printing fitted CLI arguments to stdout...\n " , __func__ );
38+ std::this_thread::sleep_for (10ms); // to avoid a race between stderr and stdout
39+ printf ( " -c % " PRIu32 " -ngl % " PRIu32, cparams. n_ctx , mparams.n_gpu_layers ) ;
3240
3341 size_t nd = llama_max_devices ();
3442 while (nd > 1 && mparams.tensor_split [nd - 1 ] == 0 .0f ) {
@@ -37,26 +45,22 @@ int main(int argc, char ** argv) {
3745 if (nd > 1 ) {
3846 for (size_t id = 0 ; id < nd; id++) {
3947 if (id == 0 ) {
40- std::cout << " -ts " ;
48+ printf ( " -ts " ) ;
4149 }
42- if (id > 0 ) {
43- std::cout << " ," ;
44- }
45- std::cout << mparams.tensor_split [id];
50+ printf (" %s%" PRIu32, id > 0 ? " ," : " " , uint32_t (mparams.tensor_split [id]));
4651 }
4752 }
4853
4954 const size_t ntbo = llama_max_tensor_buft_overrides ();
55+ bool any_tbo = false ;
5056 for (size_t itbo = 0 ; itbo < ntbo && mparams.tensor_buft_overrides [itbo].pattern != nullptr ; itbo++) {
5157 if (itbo == 0 ) {
52- std::cout << " -ot " ;
53- }
54- if (itbo > 0 ) {
55- std::cout << " ," ;
58+ printf (" -ot \" " );
5659 }
57- std::cout << mparams.tensor_buft_overrides [itbo].pattern << " =" << ggml_backend_buft_name (mparams.tensor_buft_overrides [itbo].buft );
60+ printf (" %s%s=%s" , itbo > 0 ? " ," : " " , mparams.tensor_buft_overrides [itbo].pattern , ggml_backend_buft_name (mparams.tensor_buft_overrides [itbo].buft ));
61+ any_tbo = true ;
5862 }
59- std::cout << " \n " ;
63+ printf ( " %s \n " , any_tbo ? " \" " : " " ) ;
6064
6165 return 0 ;
6266}
0 commit comments