diff --git a/src/nvidia/src/kernel/compute/fabric.c b/src/nvidia/src/kernel/compute/fabric.c index acd71f83f6..be4c52304f 100644 --- a/src/nvidia/src/kernel/compute/fabric.c +++ b/src/nvidia/src/kernel/compute/fabric.c @@ -892,12 +892,12 @@ fabricConstruct_IMPL return NV_OK; -//TODO: Remove the WAR to suppress unused label warning -goto fail; -fail: - fabricDestruct_IMPL(pFabric); - return status; -} +// Removed the WAR to suppress unused label warning. + + fail: + fabricDestruct_IMPL(pFabric); + return status; + } void fabricDestruct_IMPL diff --git a/src/nvidia/src/kernel/core/hal_mgr.c b/src/nvidia/src/kernel/core/hal_mgr.c index 78e6c0d783..6968c63b76 100644 --- a/src/nvidia/src/kernel/core/hal_mgr.c +++ b/src/nvidia/src/kernel/core/hal_mgr.c @@ -77,11 +77,13 @@ halmgrCreateHal_IMPL NV_ASSERT_OR_RETURN(halImpl < HAL_IMPL_MAXIMUM, NV_ERR_INVALID_ARGUMENT); + // Guard against duplicate registration, would silently leak the existing object mem leak + NV_ASSERT_OR_RETURN(pHalMgr->pHalList[halImpl] == NULL, NV_ERR_INVALID_STATE); + status = objCreate(&pHal, pHalMgr, OBJHAL); if (status != NV_OK) return status; - // Store away the object pointer for this particular HAL object pHalMgr->pHalList[halImpl] = pHal; return NV_OK; diff --git a/src/nvidia/src/kernel/core/system.c b/src/nvidia/src/kernel/core/system.c index f828d6e396..a9a59dd4d5 100644 --- a/src/nvidia/src/kernel/core/system.c +++ b/src/nvidia/src/kernel/core/system.c @@ -861,6 +861,8 @@ sysSyncExternalFabricMgmtWAR_IMPL return status; } + +// use separate gpuLockMask for acquire and release so gpumgrGetGpuAttachInfo can't clobber it and cause a lock imbalance. static void _sysRefreshAllGpuRecoveryAction ( @@ -872,10 +874,11 @@ _sysRefreshAllGpuRecoveryAction NvU32 i; NvU32 gpuCount; NvU32 gpuIndex; + NvU32 gpuLockMask; NvU32 gpuMask; NV_ASSERT_OK_OR_ELSE(status, - rmGpuGroupLockAcquire(0, GPU_LOCK_GRP_ALL, GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_NONE, &gpuMask), + rmGpuGroupLockAcquire(0, GPU_LOCK_GRP_ALL, GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_NONE, &gpuLockMask), return); gpumgrGetGpuAttachInfo(&gpuCount, &gpuMask); @@ -888,7 +891,7 @@ _sysRefreshAllGpuRecoveryAction } } - rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE); + rmGpuGroupLockRelease(gpuLockMask, GPUS_LOCK_FLAGS_NONE); } void diff --git a/src/nvidia/src/kernel/mem_mgr/console_mem.c b/src/nvidia/src/kernel/mem_mgr/console_mem.c index 97fe711ec2..eab8570087 100644 --- a/src/nvidia/src/kernel/mem_mgr/console_mem.c +++ b/src/nvidia/src/kernel/mem_mgr/console_mem.c @@ -72,8 +72,13 @@ conmemConstruct_IMPL } NV_ASSERT(pMemDesc->Allocated == 0); - memdescAddRef(pMemDesc); - pMemDesc->DupCount++; + + // fixed DupCount being incremented manually without a matching decrement on failure. + + if (status != NV_OK) { + pMemDesc->DupCount--; + memdescDestroy(pMemDesc); +} // // NV01_MEMORY_FRAMEBUFFER_CONSOLE is just a way to get at the reserved diff --git a/src/nvidia/src/kernel/power/gpu_boost_mgr.c b/src/nvidia/src/kernel/power/gpu_boost_mgr.c index 05920d4b39..99ef444926 100644 --- a/src/nvidia/src/kernel/power/gpu_boost_mgr.c +++ b/src/nvidia/src/kernel/power/gpu_boost_mgr.c @@ -547,6 +547,7 @@ gpuboostmgrGpuItr_IMPL * @return NV_ERR_OBJECT_NOT_FOUND if SGBG is not found * @return NV_ERR_INVALID_ARGUMENT Null or incorrect arguments passed in. */ + NV_STATUS gpuboostmgrGetBoostGrpIdFromGpu_IMPL ( @@ -556,7 +557,7 @@ gpuboostmgrGetBoostGrpIdFromGpu_IMPL ) { NvU32 i; - NvU32 index = 0; + NvU32 index; OBJGPU *pGpuTemp = NULL; *pBoostGrpId = NV0000_SYNC_GPU_BOOST_INVALID_GROUP_ID; @@ -564,8 +565,12 @@ gpuboostmgrGetBoostGrpIdFromGpu_IMPL NV_ASSERT_OR_RETURN(NULL != pGpu, NV_ERR_INVALID_ARGUMENT); NV_ASSERT_OR_RETURN(NULL != pBoostGrpId, NV_ERR_INVALID_ARGUMENT); - for (i = 0; i < pBoostMgr->groupCount; i++) + for (i = 0; i < NV0000_SYNC_GPU_BOOST_MAX_GROUPS; i++) { + if (NV_OK != gpuboostmgrValidateGroupId(pBoostMgr, i)) + continue; + + index = 0; while (NULL != (pGpuTemp = gpuboostmgrGpuItr(pBoostMgr, i, &index))) { if (pGpuTemp->gpuId == pGpu->gpuId)