Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs/envvars.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,25 @@ Runtime Environment Variables

These environment variables control the behavior of Transformer Engine during execution.

General
^^^^^^^

.. envvar:: NVTE_TENSOR_HANDLE_POOL_SIZE_MB

:Type: ``int`` (positive integer)
:Default: ``20``
:Description: Size in MiB of the internal ``NVTETensor`` handle pool. Increase this
value if an application legitimately creates more tensor handles than
the default pool can hold.

.. envvar:: NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB

:Type: ``int`` (positive integer)
:Default: ``20``
:Description: Size in MiB of the internal ``NVTEGroupedTensor`` handle pool. Increase
this value if an application legitimately creates more grouped tensor
handles than the default pool can hold.

Attention Backend Selection
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
48 changes: 41 additions & 7 deletions transformer_engine/common/transformer_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <climits>
#include <cstring>
#include <iostream>
#include <limits>
#include <mutex>
#include <optional>
#include <string>
Expand All @@ -21,6 +22,7 @@
#include "common.h"
#include "common/util/cuda_runtime.h"
#include "common/util/logging.h"
#include "common/util/system.h"

namespace transformer_engine {

Expand Down Expand Up @@ -393,6 +395,29 @@ void CheckOutputGroupedTensor(const GroupedTensor &t, std::string_view name, boo
CheckGroupedTensorShapeArrays(t, name);
}

namespace {

constexpr size_t kDefaultTensorHandlePoolSizeMB = 20;
constexpr size_t kBytesPerMB = 1024 * 1024;

size_t GetTensorHandlePoolSizeMB(const char *env_var) {
const size_t pool_size_mb = getenv<size_t>(env_var, kDefaultTensorHandlePoolSizeMB);
NVTE_CHECK(pool_size_mb > 0, env_var, " must be a positive integer.");
NVTE_CHECK(pool_size_mb <= std::numeric_limits<size_t>::max() / kBytesPerMB, env_var,
" is too large.");
Comment on lines +404 to +407
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Partial string values accepted silently

getenv_helper for numeric types calls iss >> value without checking iss.eof() afterwards, so an env var like NVTE_TENSOR_HANDLE_POOL_SIZE_MB=50bad will parse successfully as 50 — the trailing non-numeric characters are silently ignored. The subsequent range checks in GetTensorHandlePoolSizeMB only reject zero and overflow; they won't catch this case. Consider calling std::getenv directly here and using std::stoull with an idx argument (or verifying iss.eof()) to ensure the entire string is consumed before calling the generic getenv<size_t> wrapper.

Comment on lines +405 to +407
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Negative inputs produce a misleading "too large" error

getenv<size_t> parses the env-var string into an unsigned size_t. On virtually all implementations, a negative string like -1 wraps around to SIZE_MAX without setting failbit, so the pool_size_mb > 0 check (designed to catch zero/negative intent) is bypassed, and the overflow guard fires instead with the message "… is too large." — not the intended "… must be a positive integer." The docs describe the type as int (positive integer), so users who supply a negative value will see a confusing diagnostic.

return pool_size_mb;
}

size_t GetTensorHandlePoolCapacity(size_t pool_size_mb, size_t handle_size, const char *handle_name,
const char *env_var) {
const size_t pool_size_bytes = pool_size_mb * kBytesPerMB;
NVTE_CHECK(pool_size_bytes >= handle_size, env_var, "=", pool_size_mb,
" MiB is too small for one ", handle_name, " handle of size ", handle_size, " bytes.");
return pool_size_bytes / handle_size;
}

} // namespace

class TensorAllocator {
public:
static TensorAllocator &instance() {
Expand All @@ -406,8 +431,10 @@ class TensorAllocator {
std::lock_guard<std::mutex> lock(mutex);
const size_t available = free_list.size() + (memory.capacity() - memory.size());
NVTE_CHECK(available >= N, "Cannot allocate ", N,
" new NVTETensors. Maximum number of tensors reached: ", MAX_TENSOR_NUM,
". There is probably a memory leak in your application.");
" new NVTETensors. Maximum number of tensors reached: ", MAX_TENSOR_NUM, " (",
TENSOR_HANDLE_POOL_SIZE_MB,
" MiB handle pool). If your application legitimately needs more tensor handles, "
"increase NVTE_TENSOR_HANDLE_POOL_SIZE_MB.");
for (size_t i = 0; i < N; ++i) {
uintptr_t index;
if (!free_list.empty()) {
Expand Down Expand Up @@ -479,9 +506,11 @@ class TensorAllocator {

std::mutex mutex;
std::atomic<size_t> size;
// Allocate at most 20 MB for tensors
// Should be replaced by virtual memory allocation
const size_t MAX_TENSOR_NUM = 20 * 1024 * 1024 / sizeof(Tensor);
const size_t TENSOR_HANDLE_POOL_SIZE_MB =
GetTensorHandlePoolSizeMB("NVTE_TENSOR_HANDLE_POOL_SIZE_MB");
const size_t MAX_TENSOR_NUM = GetTensorHandlePoolCapacity(
TENSOR_HANDLE_POOL_SIZE_MB, sizeof(Tensor), "NVTETensor", "NVTE_TENSOR_HANDLE_POOL_SIZE_MB");
std::vector<uintptr_t> free_list;
std::vector<Tensor> memory;
bool debug = false;
Expand Down Expand Up @@ -532,7 +561,9 @@ class GroupedTensorAllocator {
}
NVTE_ERROR(
"Cannot allocate a new NVTEGroupedTensor. Maximum number of grouped tensors reached: ",
MAX_GROUPED_TENSOR_NUM, ". There is probably a memory leak in your application.");
MAX_GROUPED_TENSOR_NUM, " (", GROUPED_TENSOR_HANDLE_POOL_SIZE_MB,
" MiB handle pool). If your application legitimately needs more grouped tensor handles, "
"increase NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB.");
}

void Free(NVTEGroupedTensor t) {
Expand Down Expand Up @@ -564,8 +595,11 @@ class GroupedTensorAllocator {

std::mutex mutex;
std::atomic<size_t> size;
// Allocate at most 20 MB for grouped tensors
const size_t MAX_GROUPED_TENSOR_NUM = 20 * 1024 * 1024 / sizeof(GroupedTensor);
const size_t GROUPED_TENSOR_HANDLE_POOL_SIZE_MB =
GetTensorHandlePoolSizeMB("NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB");
const size_t MAX_GROUPED_TENSOR_NUM =
GetTensorHandlePoolCapacity(GROUPED_TENSOR_HANDLE_POOL_SIZE_MB, sizeof(GroupedTensor),
"NVTEGroupedTensor", "NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB");
std::vector<uintptr_t> free_list;
std::vector<GroupedTensor> memory;
};
Expand Down
Loading