diff --git a/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm.go b/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm.go index 38d3b68d1..638df91da 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm.go +++ b/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm.go @@ -154,20 +154,30 @@ func (k *KVMResourceCapacityKPI) Collect(ch chan<- prometheus.Metric) { } for _, hypervisor := range hvs.Items { - cpuTotal, hasCPUTotal := hypervisor.Status.Capacity["cpu"] - ramTotal, hasRAMTotal := hypervisor.Status.Capacity["memory"] + if hypervisor.Status.EffectiveCapacity == nil { + slog.Warn("hypervisor with nil effective capacity, skipping", "host", hypervisor.Name) + continue + } + + cpuTotal, hasCPUTotal := hypervisor.Status.EffectiveCapacity[hv1.ResourceCPU] + ramTotal, hasRAMTotal := hypervisor.Status.EffectiveCapacity[hv1.ResourceMemory] if !hasCPUTotal || !hasRAMTotal { slog.Error("hypervisor missing cpu or ram total capacity", "hypervisor", hypervisor.Name) continue } - cpuUsed, hasCPUUtilized := hypervisor.Status.Allocation["cpu"] + if cpuTotal.IsZero() || ramTotal.IsZero() { + slog.Warn("hypervisor with zero cpu or ram total capacity, skipping", "host", hypervisor.Name) + continue + } + + cpuUsed, hasCPUUtilized := hypervisor.Status.Allocation[hv1.ResourceCPU] if !hasCPUUtilized { cpuUsed = resource.MustParse("0") } - ramUsed, hasRAMUtilized := hypervisor.Status.Allocation["memory"] + ramUsed, hasRAMUtilized := hypervisor.Status.Allocation[hv1.ResourceMemory] if !hasRAMUtilized { ramUsed = resource.MustParse("0") } diff --git a/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm_test.go b/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm_test.go index d0f3b1780..bb2e5f91a 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm_test.go +++ b/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm_test.go @@ -47,6 +47,55 @@ func TestKVMResourceCapacityKPI_Collect(t *testing.T) { hypervisors []hv1.Hypervisor expectedMetrics map[string][]expectedMetric // metric_name -> []expectedMetric }{ + { + name: "single hypervisor with nil effective capacity", + hypervisors: []hv1.Hypervisor{ + { + ObjectMeta: v1.ObjectMeta{ + Name: "node001-bb088", + Labels: map[string]string{ + "topology.kubernetes.io/zone": "qa-1a", + }, + }, + Status: hv1.HypervisorStatus{ + EffectiveCapacity: nil, // Simulate nil effective capacity + Allocation: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceCPU: resource.MustParse("64"), + hv1.ResourceMemory: resource.MustParse("256Gi"), + }, + Traits: []string{}, + }, + }, + }, + // No metrics should be emitted for this hypervisor since effective capacity is nil + expectedMetrics: map[string][]expectedMetric{}, + }, + { + name: "single hypervisor with zero total capacity", + hypervisors: []hv1.Hypervisor{ + { + ObjectMeta: v1.ObjectMeta{ + Name: "node001-bb088", + Labels: map[string]string{ + "topology.kubernetes.io/zone": "qa-1a", + }, + }, + Status: hv1.HypervisorStatus{ + EffectiveCapacity: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceCPU: resource.MustParse("0"), // Simulate zero CPU capacity + hv1.ResourceMemory: resource.MustParse("0"), // Simulate zero RAM capacity + }, + Allocation: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceCPU: resource.MustParse("0"), + hv1.ResourceMemory: resource.MustParse("0"), + }, + Traits: []string{}, + }, + }, + }, + // No metrics should be emitted for this hypervisor since total capacity is zero + expectedMetrics: map[string][]expectedMetric{}, + }, { name: "single hypervisor with default traits", hypervisors: []hv1.Hypervisor{ @@ -58,7 +107,7 @@ func TestKVMResourceCapacityKPI_Collect(t *testing.T) { }, }, Status: hv1.HypervisorStatus{ - Capacity: map[hv1.ResourceName]resource.Quantity{ + EffectiveCapacity: map[hv1.ResourceName]resource.Quantity{ hv1.ResourceCPU: resource.MustParse("128"), hv1.ResourceMemory: resource.MustParse("512Gi"), }, @@ -148,7 +197,7 @@ func TestKVMResourceCapacityKPI_Collect(t *testing.T) { }, }, Status: hv1.HypervisorStatus{ - Capacity: map[hv1.ResourceName]resource.Quantity{ + EffectiveCapacity: map[hv1.ResourceName]resource.Quantity{ hv1.ResourceCPU: resource.MustParse("256"), hv1.ResourceMemory: resource.MustParse("1Ti"), }, @@ -209,7 +258,7 @@ func TestKVMResourceCapacityKPI_Collect(t *testing.T) { }, }, Status: hv1.HypervisorStatus{ - Capacity: map[hv1.ResourceName]resource.Quantity{ + EffectiveCapacity: map[hv1.ResourceName]resource.Quantity{ hv1.ResourceCPU: resource.MustParse("64"), hv1.ResourceMemory: resource.MustParse("256Gi"), }, @@ -255,7 +304,7 @@ func TestKVMResourceCapacityKPI_Collect(t *testing.T) { }, }, Status: hv1.HypervisorStatus{ - Capacity: map[hv1.ResourceName]resource.Quantity{ + EffectiveCapacity: map[hv1.ResourceName]resource.Quantity{ hv1.ResourceCPU: resource.MustParse("100"), hv1.ResourceMemory: resource.MustParse("200Gi"), }, @@ -274,7 +323,7 @@ func TestKVMResourceCapacityKPI_Collect(t *testing.T) { }, }, Status: hv1.HypervisorStatus{ - Capacity: map[hv1.ResourceName]resource.Quantity{ + EffectiveCapacity: map[hv1.ResourceName]resource.Quantity{ hv1.ResourceCPU: resource.MustParse("200"), hv1.ResourceMemory: resource.MustParse("400Gi"), }, @@ -332,7 +381,7 @@ func TestKVMResourceCapacityKPI_Collect(t *testing.T) { }, }, Status: hv1.HypervisorStatus{ - Capacity: map[hv1.ResourceName]resource.Quantity{ + EffectiveCapacity: map[hv1.ResourceName]resource.Quantity{ hv1.ResourceCPU: resource.MustParse("96"), hv1.ResourceMemory: resource.MustParse("384Gi"), },