compact with the function select by cuGetProcAddress#32
compact with the function select by cuGetProcAddress#32VincentLeeMax wants to merge 1 commit intotkestack:masterfrom
Conversation
|
The problem may be the missing entry |
Thanks for replaying. I think it's not the same problem with #20. Calling After adding some log to CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion,
cuuint64_t flags) {
CUresult ret;
int i;
load_necessary_data();
if (!is_custom_config_path()) {
pthread_once(&g_register_set, register_to_remote);
}
pthread_once(&g_init_set, initialization);
if (!strcmp(symbol, "cuMemAlloc")) {
LOGGER(1, "%s call version: %d.", symbol, cudaVersion);
ret = CUDA_ENTRY_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn,
3020, flags);
LOGGER(1, "cudaVersion 3020, cudaMalloc function ptr: %d.", *pfn);
ret = CUDA_ENTRY_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn,
2000, flags);
LOGGER(1, "cudaVersion 2000, cudaMalloc function ptr: %d.", *pfn);
entry_t entry = cuda_library_entry[CUDA_ENTRY_ENUM(cuMemAlloc)];
LOGGER(1, "in cuda_library_entry, %s function ptr: %d.", entry.name, entry.fn_ptr);
}
ret = CUDA_ENTRY_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn,
cudaVersion, flags);
if (ret == CUDA_SUCCESS) {
for (i = 0; i < cuda_hook_nums; i++) {
if (!strcmp(symbol, cuda_hooks_entry[i].name)) {
LOGGER(5, "Match hook %s", symbol);
LOGGER(1, "%s call version: %d.", symbol, cudaVersion);
*pfn = cuda_hooks_entry[i].fn_ptr;
break;
}
}
}
return ret;
}Since we redirect the function in |
|
@VincentLeeMax @mYmNeo According to nvdia docs,
|
| for (i = 0; i < cuda_hook_nums; i++) { | ||
| if (!strcmp(symbol, cuda_hooks_entry[i].name)) { | ||
| LOGGER(5, "Match hook %s", symbol); | ||
| cuda_library_entry[cuda_hooks_entry[i].library_index].fn_ptr = *pfn; |
There was a problem hiding this comment.
we'd better to match the function entry using both symbol and version.


when cuda==11.3, running whith

https://github.com/NVIDIA/cuda-samples/tree/v11.3/Samples/reduction,cudaMallocwill meetcudaErrorDeviceUninitializederror. Update the correspondingcuda_library_entryfunction to the function returned bycuGetProcAddresswill fix it.