-
Notifications
You must be signed in to change notification settings - Fork 741
Make NVTE tensor handle pool size configurable #3090
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,7 @@ | |
| #include <climits> | ||
| #include <cstring> | ||
| #include <iostream> | ||
| #include <limits> | ||
| #include <mutex> | ||
| #include <optional> | ||
| #include <string> | ||
|
|
@@ -21,6 +22,7 @@ | |
| #include "common.h" | ||
| #include "common/util/cuda_runtime.h" | ||
| #include "common/util/logging.h" | ||
| #include "common/util/system.h" | ||
|
|
||
| namespace transformer_engine { | ||
|
|
||
|
|
@@ -393,6 +395,29 @@ void CheckOutputGroupedTensor(const GroupedTensor &t, std::string_view name, boo | |
| CheckGroupedTensorShapeArrays(t, name); | ||
| } | ||
|
|
||
| namespace { | ||
|
|
||
| constexpr size_t kDefaultTensorHandlePoolSizeMB = 20; | ||
| constexpr size_t kBytesPerMB = 1024 * 1024; | ||
|
|
||
| size_t GetTensorHandlePoolSizeMB(const char *env_var) { | ||
| const size_t pool_size_mb = getenv<size_t>(env_var, kDefaultTensorHandlePoolSizeMB); | ||
| NVTE_CHECK(pool_size_mb > 0, env_var, " must be a positive integer."); | ||
| NVTE_CHECK(pool_size_mb <= std::numeric_limits<size_t>::max() / kBytesPerMB, env_var, | ||
| " is too large."); | ||
|
Comment on lines
+405
to
+407
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| return pool_size_mb; | ||
| } | ||
|
|
||
| size_t GetTensorHandlePoolCapacity(size_t pool_size_mb, size_t handle_size, const char *handle_name, | ||
| const char *env_var) { | ||
| const size_t pool_size_bytes = pool_size_mb * kBytesPerMB; | ||
| NVTE_CHECK(pool_size_bytes >= handle_size, env_var, "=", pool_size_mb, | ||
| " MiB is too small for one ", handle_name, " handle of size ", handle_size, " bytes."); | ||
| return pool_size_bytes / handle_size; | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| class TensorAllocator { | ||
| public: | ||
| static TensorAllocator &instance() { | ||
|
|
@@ -406,8 +431,10 @@ class TensorAllocator { | |
| std::lock_guard<std::mutex> lock(mutex); | ||
| const size_t available = free_list.size() + (memory.capacity() - memory.size()); | ||
| NVTE_CHECK(available >= N, "Cannot allocate ", N, | ||
| " new NVTETensors. Maximum number of tensors reached: ", MAX_TENSOR_NUM, | ||
| ". There is probably a memory leak in your application."); | ||
| " new NVTETensors. Maximum number of tensors reached: ", MAX_TENSOR_NUM, " (", | ||
| TENSOR_HANDLE_POOL_SIZE_MB, | ||
| " MiB handle pool). If your application legitimately needs more tensor handles, " | ||
| "increase NVTE_TENSOR_HANDLE_POOL_SIZE_MB."); | ||
| for (size_t i = 0; i < N; ++i) { | ||
| uintptr_t index; | ||
| if (!free_list.empty()) { | ||
|
|
@@ -479,9 +506,11 @@ class TensorAllocator { | |
|
|
||
| std::mutex mutex; | ||
| std::atomic<size_t> size; | ||
| // Allocate at most 20 MB for tensors | ||
| // Should be replaced by virtual memory allocation | ||
| const size_t MAX_TENSOR_NUM = 20 * 1024 * 1024 / sizeof(Tensor); | ||
| const size_t TENSOR_HANDLE_POOL_SIZE_MB = | ||
| GetTensorHandlePoolSizeMB("NVTE_TENSOR_HANDLE_POOL_SIZE_MB"); | ||
| const size_t MAX_TENSOR_NUM = GetTensorHandlePoolCapacity( | ||
| TENSOR_HANDLE_POOL_SIZE_MB, sizeof(Tensor), "NVTETensor", "NVTE_TENSOR_HANDLE_POOL_SIZE_MB"); | ||
| std::vector<uintptr_t> free_list; | ||
| std::vector<Tensor> memory; | ||
| bool debug = false; | ||
|
|
@@ -532,7 +561,9 @@ class GroupedTensorAllocator { | |
| } | ||
| NVTE_ERROR( | ||
| "Cannot allocate a new NVTEGroupedTensor. Maximum number of grouped tensors reached: ", | ||
| MAX_GROUPED_TENSOR_NUM, ". There is probably a memory leak in your application."); | ||
| MAX_GROUPED_TENSOR_NUM, " (", GROUPED_TENSOR_HANDLE_POOL_SIZE_MB, | ||
| " MiB handle pool). If your application legitimately needs more grouped tensor handles, " | ||
| "increase NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB."); | ||
| } | ||
|
|
||
| void Free(NVTEGroupedTensor t) { | ||
|
|
@@ -564,8 +595,11 @@ class GroupedTensorAllocator { | |
|
|
||
| std::mutex mutex; | ||
| std::atomic<size_t> size; | ||
| // Allocate at most 20 MB for grouped tensors | ||
| const size_t MAX_GROUPED_TENSOR_NUM = 20 * 1024 * 1024 / sizeof(GroupedTensor); | ||
| const size_t GROUPED_TENSOR_HANDLE_POOL_SIZE_MB = | ||
| GetTensorHandlePoolSizeMB("NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB"); | ||
| const size_t MAX_GROUPED_TENSOR_NUM = | ||
| GetTensorHandlePoolCapacity(GROUPED_TENSOR_HANDLE_POOL_SIZE_MB, sizeof(GroupedTensor), | ||
| "NVTEGroupedTensor", "NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB"); | ||
| std::vector<uintptr_t> free_list; | ||
| std::vector<GroupedTensor> memory; | ||
| }; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getenv_helperfor numeric types callsiss >> valuewithout checkingiss.eof()afterwards, so an env var likeNVTE_TENSOR_HANDLE_POOL_SIZE_MB=50badwill parse successfully as50— the trailing non-numeric characters are silently ignored. The subsequent range checks inGetTensorHandlePoolSizeMBonly reject zero and overflow; they won't catch this case. Consider callingstd::getenvdirectly here and usingstd::stoullwith anidxargument (or verifyingiss.eof()) to ensure the entire string is consumed before calling the genericgetenv<size_t>wrapper.