Skip to content

Commit 4164596

Browse files
llama-fit-params: QoL impr. for prints/errors (#18089)
1 parent ef83fb8 commit 4164596

File tree

1 file changed

+20
-16
lines changed

1 file changed

+20
-16
lines changed

tools/fit-params/fit-params.cpp

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44
#include "common.h"
55
#include "log.h"
66

7-
#include <iostream>
7+
#include <chrono>
8+
#include <cinttypes>
9+
#include <thread>
10+
11+
using namespace std::chrono_literals;
812

913
#if defined(_MSC_VER)
1014
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -22,13 +26,17 @@ int main(int argc, char ** argv) {
2226
llama_numa_init(params.numa);
2327
auto mparams = common_model_params_to_llama(params);
2428
auto cparams = common_context_params_to_llama(params);
25-
llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
29+
const bool success = llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
2630
params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx,
2731
params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
32+
if (!success) {
33+
LOG_ERR("%s: failed to fit CLI arguments to free memory, exiting...\n", __func__);
34+
exit(1);
35+
}
2836

29-
LOG_INF("Printing fitted CLI arguments to stdout...\n");
30-
std::cout << "-c " << cparams.n_ctx;
31-
std::cout << " -ngl " << mparams.n_gpu_layers;
37+
LOG_INF("%s: printing fitted CLI arguments to stdout...\n", __func__);
38+
std::this_thread::sleep_for(10ms); // to avoid a race between stderr and stdout
39+
printf("-c %" PRIu32 " -ngl %" PRIu32, cparams.n_ctx, mparams.n_gpu_layers);
3240

3341
size_t nd = llama_max_devices();
3442
while (nd > 1 && mparams.tensor_split[nd - 1] == 0.0f) {
@@ -37,26 +45,22 @@ int main(int argc, char ** argv) {
3745
if (nd > 1) {
3846
for (size_t id = 0; id < nd; id++) {
3947
if (id == 0) {
40-
std::cout << " -ts ";
48+
printf(" -ts ");
4149
}
42-
if (id > 0) {
43-
std::cout << ",";
44-
}
45-
std::cout << mparams.tensor_split[id];
50+
printf("%s%" PRIu32, id > 0 ? "," : "", uint32_t(mparams.tensor_split[id]));
4651
}
4752
}
4853

4954
const size_t ntbo = llama_max_tensor_buft_overrides();
55+
bool any_tbo = false;
5056
for (size_t itbo = 0; itbo < ntbo && mparams.tensor_buft_overrides[itbo].pattern != nullptr; itbo++) {
5157
if (itbo == 0) {
52-
std::cout << " -ot ";
53-
}
54-
if (itbo > 0) {
55-
std::cout << ",";
58+
printf(" -ot \"");
5659
}
57-
std::cout << mparams.tensor_buft_overrides[itbo].pattern << "=" << ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft);
60+
printf("%s%s=%s", itbo > 0 ? "," : "", mparams.tensor_buft_overrides[itbo].pattern, ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft));
61+
any_tbo = true;
5862
}
59-
std::cout << "\n";
63+
printf("%s\n", any_tbo ? "\"" : "");
6064

6165
return 0;
6266
}

0 commit comments

Comments
 (0)