From e31a035262488993d981f09b372e7cac266e84a3 Mon Sep 17 00:00:00 2001 From: xinyu Date: Thu, 19 Feb 2026 23:14:40 +0800 Subject: [PATCH] hard limited n ctx to 2048 --- slm_server/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slm_server/config.py b/slm_server/config.py index 7e90577..7948da3 100644 --- a/slm_server/config.py +++ b/slm_server/config.py @@ -62,7 +62,7 @@ class Settings(BaseSettings): description="Owner label for /models list. Set SLM_MODEL_OWNER to override.", ) n_ctx: int = Field( - 8192, description="Maximum context window (input + generated tokens)." + 2048, description="Maximum context window (input + generated tokens)." ) n_threads: int = Field( 2, description="Number of OpenMP threads llama‑cpp will spawn."