Skip to content

Commit 4fb808c

Browse files
committed
Use cached TemplateNodeInfos in DRA processor
The DRACustomResourcesProcessor now attempts to retrieve NodeInfo from the TemplateNodeInfoRegistry before falling back to the NodeGroup. This ensures the processor uses the canonical TemplateNodeInfo for the current autoscaling loop. Crucially, this preserves any enrichments (such as custom DRA resource slices) that are computed during the registry's Recompute phase but might be absent in a fresh, raw template from the CloudProvider.
1 parent ebb761f commit 4fb808c

File tree

2 files changed

+71
-6
lines changed

2 files changed

+71
-6
lines changed

cluster-autoscaler/processors/customresources/dra_processor.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package customresources
1919
import (
2020
apiv1 "k8s.io/api/core/v1"
2121
resourceapi "k8s.io/api/resource/v1"
22+
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
2223

2324
"k8s.io/apimachinery/pkg/util/sets"
2425
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
@@ -57,11 +58,21 @@ func (p *DraCustomResourcesProcessor) FilterOutNodesWithUnreadyResources(autosca
5758
continue
5859
}
5960

60-
nodeInfo, err := ng.TemplateNodeInfo()
61-
if err != nil {
62-
newReadyNodes = append(newReadyNodes, node)
63-
klog.Warningf("Failed to get template node info for node group %s with error: %v", ng.Id(), err)
64-
continue
61+
var nodeInfo *framework.NodeInfo
62+
if autoscalingCtx.TemplateNodeInfoRegistry != nil {
63+
// Prefer the cached template from the registry. This template may contain enrichments (e.g.
64+
// custom DRA slices) that are not present in the raw CloudProvider template.
65+
if ni, found := autoscalingCtx.TemplateNodeInfoRegistry.GetNodeInfo(ng.Id()); found {
66+
nodeInfo = ni
67+
}
68+
}
69+
if nodeInfo == nil {
70+
nodeInfo, err = ng.TemplateNodeInfo()
71+
if err != nil {
72+
newReadyNodes = append(newReadyNodes, node)
73+
klog.Warningf("Failed to get template node info for node group %s with error: %v", ng.Id(), err)
74+
continue
75+
}
6576
}
6677

6778
nodeResourcesSlices, _ := draSnapshot.NodeResourceSlices(node.Name)

cluster-autoscaler/processors/customresources/dra_processor_test.go

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,14 @@ import (
2121
"testing"
2222
"time"
2323

24+
appsv1 "k8s.io/api/apps/v1"
2425
resourceapi "k8s.io/api/resource/v1"
2526
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store"
2627
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot"
2728
drasnapshot "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/snapshot"
2829
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
30+
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
31+
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
2932

3033
"github.com/stretchr/testify/assert"
3134
apiv1 "k8s.io/api/core/v1"
@@ -35,12 +38,36 @@ import (
3538
utils "k8s.io/autoscaler/cluster-autoscaler/utils/test"
3639
)
3740

41+
type mockTemplateNodeInfoRegistry struct {
42+
nodeInfos map[string]*framework.NodeInfo
43+
}
44+
45+
func newMockTemplateNodeInfoRegistry(nodeInfos map[string]*framework.NodeInfo) *mockTemplateNodeInfoRegistry {
46+
return &mockTemplateNodeInfoRegistry{
47+
nodeInfos: nodeInfos,
48+
}
49+
}
50+
51+
func (m *mockTemplateNodeInfoRegistry) GetNodeInfo(id string) (*framework.NodeInfo, bool) {
52+
nodeInfo, found := m.nodeInfos[id]
53+
return nodeInfo, found
54+
}
55+
56+
func (m *mockTemplateNodeInfoRegistry) GetNodeInfos() map[string]*framework.NodeInfo {
57+
return m.nodeInfos
58+
}
59+
60+
func (m *mockTemplateNodeInfoRegistry) Recompute(_ *ca_context.AutoscalingContext, _ []*apiv1.Node, _ []*appsv1.DaemonSet, _ taints.TaintConfig, _ time.Time) errors.AutoscalerError {
61+
return nil
62+
}
63+
3864
func TestFilterOutNodesWithUnreadyDRAResources(t *testing.T) {
3965
testCases := map[string]struct {
4066
nodeGroupsAllNodes map[string][]*apiv1.Node
4167
nodeGroupsTemplatesSlices map[string][]*resourceapi.ResourceSlice
4268
nodesSlices map[string][]*resourceapi.ResourceSlice
4369
expectedNodesReadiness map[string]bool
70+
registryNodeInfos map[string]*framework.NodeInfo
4471
}{
4572
"1 DRA node group all totally ready": {
4673
nodeGroupsAllNodes: map[string][]*apiv1.Node{
@@ -306,6 +333,29 @@ func TestFilterOutNodesWithUnreadyDRAResources(t *testing.T) {
306333
"node_7": true,
307334
},
308335
},
336+
"Custom DRA driver retrieved via cached template node info": {
337+
nodeGroupsAllNodes: map[string][]*apiv1.Node{
338+
"ng1": {
339+
buildTestNode("node_1", true),
340+
buildTestNode("node_2", true),
341+
},
342+
},
343+
nodeGroupsTemplatesSlices: map[string][]*resourceapi.ResourceSlice{},
344+
registryNodeInfos: map[string]*framework.NodeInfo{
345+
"ng1": framework.NewNodeInfo(
346+
buildTestNode("ng1_template", true),
347+
createNodeResourceSlices("ng1_template", []int{1}),
348+
),
349+
},
350+
nodesSlices: map[string][]*resourceapi.ResourceSlice{
351+
"node_1": createNodeResourceSlices("node_1", []int{1}),
352+
"node_2": {},
353+
},
354+
expectedNodesReadiness: map[string]bool{
355+
"node_1": true,
356+
"node_2": false,
357+
},
358+
},
309359
}
310360

311361
for tcName, tc := range testCases {
@@ -336,7 +386,11 @@ func TestFilterOutNodesWithUnreadyDRAResources(t *testing.T) {
336386
clusterSnapshotStore.SetClusterState([]*apiv1.Node{}, []*apiv1.Pod{}, draSnapshot)
337387
clusterSnapshot, _, _ := testsnapshot.NewCustomTestSnapshotAndHandle(clusterSnapshotStore)
338388

339-
autoscalingCtx := &ca_context.AutoscalingContext{CloudProvider: provider, ClusterSnapshot: clusterSnapshot}
389+
autoscalingCtx := &ca_context.AutoscalingContext{
390+
CloudProvider: provider,
391+
ClusterSnapshot: clusterSnapshot,
392+
TemplateNodeInfoRegistry: newMockTemplateNodeInfoRegistry(tc.registryNodeInfos),
393+
}
340394
processor := DraCustomResourcesProcessor{}
341395
newAllNodes, newReadyNodes := processor.FilterOutNodesWithUnreadyResources(autoscalingCtx, initialAllNodes, initialReadyNodes, draSnapshot)
342396

0 commit comments

Comments
 (0)