diff --git a/docs/model_server_rest_api_chat.md b/docs/model_server_rest_api_chat.md
index b5d347d200..5a2718fc3f 100644
--- a/docs/model_server_rest_api_chat.md
+++ b/docs/model_server_rest_api_chat.md
@@ -235,11 +235,12 @@ Some parameters, especially related to sampling (like `temperature`, `top_p` etc
 |-------|----------|----------|----------|---------|-----|
 | temperature | ✅ | ✅ | ✅ | float (default: `1.0`) | The value is used to modulate token probabilities for multinomial sampling. It enables multinomial sampling when set to `> 0.0`. |
 | top_p | ✅ | ✅ | ✅ | float (default: `1.0`) | Controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to 1 to consider all tokens. |
-| top_k | ✅ | ❌ | ✅ | int (default: all tokens) | Controls the number of top tokens to consider. Set to empty or -1 to consider all tokens. |
+| min_p | ✅ | ❌ | ✅ | float (default: `0.0`) | Minimum probability threshold relative to the most likely token. Tokens with probability below `min_p` × the top token probability are filtered out. `0.0` (default) disables the filter. Typical values: `0.05`–`0.1`. Must be in `[0.0, 1.0)`. |
+| top_k | ✅ | ❌ | ✅ | int (default: `40`) | Controls the number of top tokens to consider. When multinomial sampling is active, defaults to `40` if not set. Set to `-1` to consider all tokens. |
 | repetition_penalty | ✅ | ❌ | ✅ | float (default: `1.0`) | Penalizes new tokens based on whether they appear in the prompt and the generated text so far. Values > `1.0` encourage the model to use new tokens, while values < `1.0` encourage the model to repeat tokens. `1.0` means no penalty. |
 | frequency_penalty | ✅ | ✅ | ✅ | float (default: `0.0`) | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. |
 | presence_penalty | ✅ | ✅ | ✅ | float (default: `0.0`) | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. |
-| seed | ✅ | ✅ | ✅ | integer (default: `0`) | Random seed to use for the generation. |
+| seed | ✅ | ✅ | ✅ | integer (default: random) | Random seed for generation in range `[0, 4294967295]`. Omit to use a random seed (non-deterministic). Set explicitly to get reproducible output. Note: `rng_seed` set in `generation_config.json` is not honoured for multinomial sampling — only a per-request seed is applied. |
 
 #### Speculative decoding specific
 
@@ -275,7 +276,6 @@ If any of those parameters is not specified and request is made to Prompt Lookup
 - functions
 
 #### Unsupported params from vLLM:
-- min_p
 - use_beam_search (**In OpenVINO Model Server just simply increase _best_of_ param to enable beam search**)
 - early_stopping
 - stop_token_ids
diff --git a/docs/model_server_rest_api_completions.md b/docs/model_server_rest_api_completions.md
index 5089d068b1..9b8bb81b01 100644
--- a/docs/model_server_rest_api_completions.md
+++ b/docs/model_server_rest_api_completions.md
@@ -76,11 +76,12 @@ curl http://localhost/v3/completions \
 |-------|----------|----------|----------|---------|-----|
 | temperature | ✅ | ✅ | ✅ | float (default: `1.0`) | The value is used to modulate token probabilities for multinomial sampling. It enables multinomial sampling when set to `> 0.0`. |
 | top_p | ✅ | ✅ | ✅ | float (default: `1.0`) | Controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to 1 to consider all tokens. |
-| top_k | ✅ | ❌ | ✅ | int (default: all tokens) | Controls the number of top tokens to consider. Set to empty or -1 to consider all tokens. |
+| min_p | ✅ | ❌ | ✅ | float (default: `0.0`) | Minimum probability threshold relative to the most likely token. Tokens with probability below `min_p` × the top token probability are filtered out. `0.0` (default) disables the filter. Typical values: `0.05`–`0.1`. Must be in `[0.0, 1.0)`. |
+| top_k | ✅ | ❌ | ✅ | int (default: `40`) | Controls the number of top tokens to consider. When multinomial sampling is active, defaults to `40` if not set. Set to `-1` to consider all tokens. |
 | repetition_penalty | ✅ | ❌ | ✅ | float (default: `1.0`) | Penalizes new tokens based on whether they appear in the prompt and the generated text so far. Values > `1.0` encourage the model to use new tokens, while values < `1.0` encourage the model to repeat tokens. `1.0` means no penalty. |
 | frequency_penalty | ✅ | ✅ | ✅ | float (default: `0.0`) | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. |
 | presence_penalty | ✅ | ✅ | ✅ | float (default: `0.0`) | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. |
-| seed | ✅ | ✅ | ✅ | integer (default: `0`) | Random seed to use for the generation. |
+| seed | ✅ | ✅ | ✅ | integer (default: random) | Random seed for generation in range `[0, 4294967295]`. Omit to use a random seed (non-deterministic). Set explicitly to get reproducible output. Note: `rng_seed` set in `generation_config.json` is not honoured for multinomial sampling — only a per-request seed is applied. |
 
 #### Speculative decoding specific
 
@@ -106,7 +107,6 @@ Note that below parameters are valid only for prompt lookup pipeline. Add `"prom
 
 
 #### Unsupported params from vLLM:
-- min_p
 - use_beam_search (**In OpenVINO Model Server just simply increase _best_of_ param to enable beam search**)
 - early_stopping
 - stop_token_ids
diff --git a/docs/model_server_rest_api_responses.md b/docs/model_server_rest_api_responses.md
index c27a643813..6698a47efe 100644
--- a/docs/model_server_rest_api_responses.md
+++ b/docs/model_server_rest_api_responses.md
@@ -120,11 +120,12 @@ curl http://localhost/v3/responses \
 |-------|----------|----------|---------|-----|
 | temperature | ✅ | ✅ | float (default: `1.0`) | The value is used to modulate token probabilities for multinomial sampling. It enables multinomial sampling when set to `> 0.0`. |
 | top_p | ✅ | ✅ | float (default: `1.0`) | Controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to 1 to consider all tokens. |
-| top_k | ✅ | ❌ | int (default: all tokens) | Controls the number of top tokens to consider. Set to empty or -1 to consider all tokens. |
+| min_p | ✅ | ❌ | float (default: `0.0`) | Minimum probability threshold relative to the most likely token. Tokens with probability below `min_p` × the top token probability are filtered out. `0.0` (default) disables the filter. Typical values: `0.05`–`0.1`. Must be in `[0.0, 1.0)`. |
+| top_k | ✅ | ❌ | int (default: `40`) | Controls the number of top tokens to consider. When multinomial sampling is active, defaults to `40` if not set. Set to `-1` to consider all tokens. |
 | repetition_penalty | ✅ | ❌ | float (default: `1.0`) | Penalizes new tokens based on whether they appear in the prompt and the generated text so far. Values > `1.0` encourage the model to use new tokens, while values < `1.0` encourage the model to repeat tokens. `1.0` means no penalty. |
 | frequency_penalty | ✅ | ❌ | float (default: `0.0`) | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. |
 | presence_penalty | ✅ | ❌ | float (default: `0.0`) | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. |
-| seed | ✅ | ❌ | integer (default: `0`) | Random seed to use for the generation. |
+| seed | ✅ | ❌ | integer (default: random) | Random seed for generation in range `[0, 4294967295]`. Omit to use a random seed (non-deterministic). Set explicitly to get reproducible output. Note: `rng_seed` set in `generation_config.json` is not honoured for multinomial sampling — only a per-request seed is applied. |
 
 #### Speculative decoding specific
 
diff --git a/src/llm/apis/openai_api_handler.cpp b/src/llm/apis/openai_api_handler.cpp
index dc7b9031d8..0647807948 100644
--- a/src/llm/apis/openai_api_handler.cpp
+++ b/src/llm/apis/openai_api_handler.cpp
@@ -740,21 +740,48 @@ absl::Status OpenAIApiHandler::parseCommonPart(std::optional<uint32_t> maxTokens
             return absl::InvalidArgumentError("top_p out of range(0.0, 1.0)");
     }
 
-    // top_k: int; optional - defaults to 0
-    // Extension, unsupported by OpenAI API, however supported by vLLM and CB lib
+    // min_p: float; optional - defaults to 0 (disabled)
+    // Extension, unsupported by OpenAI API, however supported by vLLM and GenAI
+    it = doc.FindMember("min_p");
+    if (it != doc.MemberEnd() && !it->value.IsNull()) {
+        if (!it->value.IsDouble() && !it->value.IsInt())
+            return absl::InvalidArgumentError("min_p is not a valid number");
+        const float minPValue = static_cast<float>(it->value.GetDouble());
+        if (minPValue < 0.0f || minPValue >= 1.0f)
+            return absl::InvalidArgumentError("min_p out of range [0.0, 1.0)");
+        request.minP = minPValue;
+    }
+
+    // top_k: int; optional - when multinomial sampling is active, defaults to 40 if not set. Pass -1 to consider all tokens.
+    // Extension, unsupported by OpenAI API, however supported by vLLM and GenAI
     it = doc.FindMember("top_k");
     if (it != doc.MemberEnd() && !it->value.IsNull()) {
         if (!it->value.IsInt())
             return absl::InvalidArgumentError("top_k is not an integer");
-        request.topK = it->value.GetInt();
+        const int topKValue = it->value.GetInt();
+        if (topKValue < -1 || topKValue == 0)
+            return absl::InvalidArgumentError("top_k must be -1 (all tokens) or a positive integer");
+        request.topK = topKValue;
     }
 
-    // seed: int; optional - defaults to 0 (not set)
+    // seed: uint32; optional - omit to use a random seed
     it = doc.FindMember("seed");
     if (it != doc.MemberEnd() && !it->value.IsNull()) {
-        if (!it->value.IsUint())
-            return absl::InvalidArgumentError("seed is not an unsigned integer");
-        request.seed = it->value.GetUint();
+        if (!it->value.IsInt() && !it->value.IsUint() && !it->value.IsInt64() && !it->value.IsUint64())
+            return absl::InvalidArgumentError("seed is not an integer");
+        if (it->value.IsUint64()) {
+            const uint64_t raw = it->value.GetUint64();
+            if (raw > std::numeric_limits<uint32_t>::max())
+                return absl::InvalidArgumentError("seed out of range [0, 4294967295]");
+            request.seed = static_cast<uint32_t>(raw);
+        } else if (it->value.IsUint()) {
+            request.seed = it->value.GetUint();
+        } else {
+            const int64_t raw = it->value.GetInt64();
+            if (raw < 0 || raw > static_cast<int64_t>(std::numeric_limits<uint32_t>::max()))
+                return absl::InvalidArgumentError("seed out of range [0, 4294967295]");
+            request.seed = static_cast<uint32_t>(raw);
+        }
     }
 
     // stop: string or array; optional - defaults to null (not set)
diff --git a/src/llm/apis/openai_request.hpp b/src/llm/apis/openai_request.hpp
index 8ffef1da1d..24327be44f 100644
--- a/src/llm/apis/openai_request.hpp
+++ b/src/llm/apis/openai_request.hpp
@@ -17,6 +17,7 @@
 // Type that holds vector of pairs where first element is chat turn index and second is image tensor
 // this way we store information about which image is associated with which chat turn
 #pragma once
+#include <cstdint>
 #include <map>
 #include <optional>
 #include <string>
@@ -57,8 +58,9 @@ struct OpenAIRequest {
     // Multinomial decoding specific
     std::optional<float> temperature{std::nullopt};
     std::optional<float> topP{std::nullopt};
+    std::optional<float> minP{std::nullopt};
     std::optional<int> topK{std::nullopt};
-    std::optional<int> seed{std::nullopt};
+    std::optional<uint32_t> seed{std::nullopt};
     std::optional<float> frequencyPenalty{std::nullopt};
     std::optional<float> presencePenalty{std::nullopt};
     std::optional<float> repetitionPenalty{std::nullopt};
diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp
index 49703c0fc2..60ec1c4f08 100644
--- a/src/llm/apis/openai_responses.cpp
+++ b/src/llm/apis/openai_responses.cpp
@@ -406,6 +406,10 @@ void OpenAIResponsesHandler::serializeCommonResponseParameters(Writer<StringBuff
         writer.String("top_p");
         writer.Double(static_cast<double>(request.topP.value()));
     }
+    if (request.minP.has_value()) {
+        writer.String("min_p");
+        writer.Double(static_cast<double>(request.minP.value()));
+    }
     writer.String("truncation");
     writer.String("disabled");
     // TODO: user not supported
diff --git a/src/llm/io_processing/base_generation_config_builder.cpp b/src/llm/io_processing/base_generation_config_builder.cpp
index 1829f93b8d..f5cb2cacdb 100644
--- a/src/llm/io_processing/base_generation_config_builder.cpp
+++ b/src/llm/io_processing/base_generation_config_builder.cpp
@@ -16,6 +16,7 @@
 
 #include "../../logging.hpp"
 #include <limits>
+#include <random>
 #include <string>
 #include <openvino/genai/generation_config.hpp>
 #include "base_generation_config_builder.hpp"
@@ -118,9 +119,11 @@ void BaseGenerationConfigBuilder::parseConfigFromRequest(const OpenAIRequest& re
     if (request.temperature.has_value())
         config.temperature = request.temperature.value();
     if (request.topK.has_value())
-        config.top_k = request.topK.value();
+        config.top_k = (request.topK.value() == -1) ? std::numeric_limits<size_t>::max() : static_cast<size_t>(request.topK.value());
     if (request.topP.has_value())
         config.top_p = request.topP.value();
+    if (request.minP.has_value())
+        config.min_p = request.minP.value();
     if (request.seed.has_value())
         config.rng_seed = request.seed.value();
     if (request.stop.has_value())
@@ -133,6 +136,26 @@ void BaseGenerationConfigBuilder::parseConfigFromRequest(const OpenAIRequest& re
         config.presence_penalty = request.presencePenalty.value();
     config.do_sample = config.temperature > 0.0f && config.num_beams == 1;
 
+    // Apply multinomial sampling defaults when not explicitly set
+    if (config.do_sample) {
+        if (!request.topK.has_value() && config.top_k == std::numeric_limits<size_t>::max()) {
+            config.top_k = 40;
+            SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Defaulting top_k to 40 for multinomial sampling.");
+        }
+        // Use random seed for multinomial sampling to ensure non-deterministic behavior by default.
+        // Note: rng_seed from generation_config.json is not honoured — only an explicit per-request
+        // seed produces deterministic output.
+        // Use a thread_local mt19937 seeded once via std::random_device to avoid per-request overhead.
+        if (!request.seed.has_value()) {
+            static thread_local std::mt19937 rng{std::random_device{}()};
+            size_t seed = 0;
+            while (seed == 0)
+                seed = rng();
+            config.rng_seed = seed;
+            SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Randomizing rng_seed for multinomial sampling: {}.", config.rng_seed);
+        }
+    }
+
     if (request.logprobschat || request.logprobs)
         config.logprobs = 1;
     // Assisted decoding specific
diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp
index ecd1d598d7..0f23258006 100644
--- a/src/test/llm/llmnode_test.cpp
+++ b/src/test/llm/llmnode_test.cpp
@@ -18,6 +18,7 @@
 #include <filesystem>
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <regex>
 #include <sstream>
 #include <string>
@@ -39,6 +40,7 @@
 #include "../../http_status_code.hpp"
 #include "../../json_parser.hpp"
 #include "../../llm/apis/openai_completions.hpp"
+#include "../../llm/io_processing/base_generation_config_builder.hpp"
 #include "../../llm/language_model/continuous_batching/llm_executor.hpp"
 #include "../../llm/language_model/continuous_batching/servable.hpp"
 #include "../../llm/servable.hpp"
@@ -370,13 +372,6 @@ TEST_P(LLMFlowHttpTestParameterized, streamCompletionsEchoWithCompletion) {
         ASSERT_EQ(d["choices"].Capacity(), 1);
         int i = 0;
         for (auto& choice : d["choices"].GetArray()) {
-            if (params.checkFinishReason) {
-                if (choice["finish_reason"].IsString()) {
-                    EXPECT_STREQ(choice["finish_reason"].GetString(), "length");
-                } else {
-                    ASSERT_TRUE(choice["finish_reason"].IsNull());
-                }
-            }
             ASSERT_EQ(choice["index"], i++);
             if (params.checkLogprobs) {
                 ASSERT_FALSE(choice["logprobs"].IsObject());
@@ -392,10 +387,11 @@ TEST_P(LLMFlowHttpTestParameterized, streamCompletionsEchoWithCompletion) {
         handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
         ovms::StatusCode::PARTIAL_END);
 
-    // Since prompt is treated as a single entity and streamer returns chunk only after space or newline
-    // we expect chunk with echoed prompt to contain space or new line at the end
-    ASSERT_TRUE(chunks[0] == "What is OpenVINO?\n" || chunks[0] == "What is OpenVINO? ");
     ASSERT_GT(chunks.size(), 1);
+    std::string combined;
+    for (const auto& chunk : chunks)
+        combined += chunk;
+    EXPECT_EQ(combined.rfind("What is OpenVINO?", 0), 0) << "Expected output to start with echoed prompt, got: " << combined;
 }
 
 TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJsonEchoOnly) {
@@ -3252,6 +3248,79 @@ TEST_P(LLMHttpParametersValidationTest, topKInvalid) {
         ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR);
 }
 
+TEST_P(LLMHttpParametersValidationTest, topKMinuOneValid) {
+    auto params = GetParam();
+    // -1 is the sentinel for "consider all tokens"
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "top_k", "-1");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+}
+
+TEST_P(LLMHttpParametersValidationTest, topKZeroInvalid) {
+    auto params = GetParam();
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "top_k", "0");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR);
+}
+
+TEST_P(LLMHttpParametersValidationTest, topKNegativeInvalid) {
+    auto params = GetParam();
+    // Only -1 is a valid negative value; other negatives must be rejected
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "top_k", "-2");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR);
+}
+
+TEST_P(LLMHttpParametersValidationTest, minPValid) {
+    auto params = GetParam();
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "min_p", "0.05");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+
+    requestBody = validRequestBodyWithParameter(params.modelName, "min_p", "0");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+}
+
+TEST_P(LLMHttpParametersValidationTest, minPInvalid) {
+    auto params = GetParam();
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "min_p", "\"INVALID\"");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR);
+}
+
+TEST_P(LLMHttpParametersValidationTest, minPOutOfRange) {
+    auto params = GetParam();
+    // min_p must be in [0.0, 1.0) — value of 1.0 is out of range
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "min_p", "1.0");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR);
+}
+
+TEST_P(LLMHttpParametersValidationTest, minPNegative) {
+    auto params = GetParam();
+    // min_p must be in [0.0, 1.0) — negative value is out of range
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "min_p", "-0.1");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR);
+}
+
 TEST_P(LLMHttpParametersValidationTest, seedValid) {
     auto params = GetParam();
     std::string requestBody = validRequestBodyWithParameter(params.modelName, "seed", "1");
@@ -3270,6 +3339,44 @@ TEST_P(LLMHttpParametersValidationTest, seedInvalid) {
         ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR);
 }
 
+TEST_P(LLMHttpParametersValidationTest, seedBoundaryZero) {
+    auto params = GetParam();
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "seed", "0");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+}
+
+TEST_P(LLMHttpParametersValidationTest, seedBoundaryMax) {
+    auto params = GetParam();
+    // Maximum valid seed: 2^32 - 1 = 4294967295
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "seed", "4294967295");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+}
+
+TEST_P(LLMHttpParametersValidationTest, seedOutOfRangeNegative) {
+    auto params = GetParam();
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "seed", "-1");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR);
+}
+
+TEST_P(LLMHttpParametersValidationTest, seedOutOfRangeOverflow) {
+    auto params = GetParam();
+    // 2^32 = 4294967296 is one past the maximum valid seed
+    std::string requestBody = validRequestBodyWithParameter(params.modelName, "seed", "4294967296");
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR);
+}
+
 TEST_P(LLMHttpParametersValidationTest, bestOfValid) {
     auto params = GetParam();
     std::string requestBody = validRequestBodyWithParameter(params.modelName, "best_of", "1");
@@ -4614,3 +4721,72 @@ TEST_F(IsolatedServableTests, PromtSizeBetweenDefaultAndNonDefaultMaxPromptLenNP
 }
 
 // TODO: Add missing tests for reading max prompt len property from configuration
+
+// Unit tests for BaseGenerationConfigBuilder multinomial sampling defaults
+
+TEST(BaseGenerationConfigBuilderTest, TopKDefaultedTo40WhenSamplingEnabled) {
+    ov::genai::GenerationConfig baseConfig;
+    BaseGenerationConfigBuilder builder{baseConfig, /*enableToolGuidedGeneration=*/false, DecodingMethod::STANDARD};
+    OpenAIRequest request;
+    request.temperature = 1.0f;  // enables do_sample; topK not set
+    builder.parseConfigFromRequest(request);
+    EXPECT_EQ(builder.getConfig().top_k, 40u);
+}
+
+TEST(BaseGenerationConfigBuilderTest, TopKPreservedWhenExplicitlySet) {
+    ov::genai::GenerationConfig baseConfig;
+    BaseGenerationConfigBuilder builder{baseConfig, /*enableToolGuidedGeneration=*/false, DecodingMethod::STANDARD};
+    OpenAIRequest request;
+    request.temperature = 1.0f;
+    request.topK = 10;
+    builder.parseConfigFromRequest(request);
+    EXPECT_EQ(builder.getConfig().top_k, 10u);
+}
+
+TEST(BaseGenerationConfigBuilderTest, TopKMinusOneMapsToInactive) {
+    ov::genai::GenerationConfig baseConfig;
+    BaseGenerationConfigBuilder builder{baseConfig, /*enableToolGuidedGeneration=*/false, DecodingMethod::STANDARD};
+    OpenAIRequest request;
+    request.temperature = 1.0f;
+    request.topK = -1;  // sentinel: consider all tokens
+    builder.parseConfigFromRequest(request);
+    EXPECT_EQ(builder.getConfig().top_k, std::numeric_limits<size_t>::max());
+}
+
+TEST(BaseGenerationConfigBuilderTest, TopKNotChangedWhenSamplingDisabled) {
+    ov::genai::GenerationConfig baseConfig;
+    BaseGenerationConfigBuilder builder{baseConfig, /*enableToolGuidedGeneration=*/false, DecodingMethod::STANDARD};
+    OpenAIRequest request;
+    request.temperature = 0.0f;  // greedy decoding, do_sample = false
+    builder.parseConfigFromRequest(request);
+    EXPECT_EQ(builder.getConfig().top_k, std::numeric_limits<size_t>::max());
+}
+
+TEST(BaseGenerationConfigBuilderTest, SeedRandomizedWhenOmittedDuringSampling) {
+    ov::genai::GenerationConfig baseConfig;
+    OpenAIRequest request;
+    request.temperature = 1.0f;  // enables do_sample; seed not set → must be randomized
+
+    // Parse the same request twice — seeds must differ (non-deterministic per request)
+    BaseGenerationConfigBuilder builder1{baseConfig, /*enableToolGuidedGeneration=*/false, DecodingMethod::STANDARD};
+    builder1.parseConfigFromRequest(request);
+    const size_t seed1 = builder1.getConfig().rng_seed;
+
+    BaseGenerationConfigBuilder builder2{baseConfig, /*enableToolGuidedGeneration=*/false, DecodingMethod::STANDARD};
+    builder2.parseConfigFromRequest(request);
+    const size_t seed2 = builder2.getConfig().rng_seed;
+
+    EXPECT_NE(seed1, 0u);
+    EXPECT_NE(seed2, 0u);
+    EXPECT_NE(seed1, seed2) << "Expected different seeds for successive omitted-seed requests";
+}
+
+TEST(BaseGenerationConfigBuilderTest, SeedPreservedWhenExplicitlySet) {
+    ov::genai::GenerationConfig baseConfig;
+    BaseGenerationConfigBuilder builder{baseConfig, /*enableToolGuidedGeneration=*/false, DecodingMethod::STANDARD};
+    OpenAIRequest request;
+    request.temperature = 1.0f;
+    request.seed = 42u;
+    builder.parseConfigFromRequest(request);
+    EXPECT_EQ(builder.getConfig().rng_seed, 42u);
+}