diff --git a/internal/scheduling/nova/plugins/filters/filter_external_customer.go b/internal/scheduling/nova/plugins/filters/filter_external_customer.go index 62c059b10..56f73c8ac 100644 --- a/internal/scheduling/nova/plugins/filters/filter_external_customer.go +++ b/internal/scheduling/nova/plugins/filters/filter_external_customer.go @@ -37,8 +37,8 @@ func (s *FilterExternalCustomerStep) Run(traceLog *slog.Logger, request api.Exte result := s.IncludeAllHostsFromRequest(request) domainName, err := request.Spec.Data.GetSchedulerHintStr("domain_name") if err != nil { - traceLog.Error("failed to get domain_name scheduler hint", "error", err) - return nil, err + traceLog.Error("failed to get domain_name scheduler hint, skipping filter", "error", err) + return result, nil } if slices.Contains(s.Options.CustomerIgnoredDomainNames, domainName) { traceLog.Info("domain is no external customer domain, skipping filter", "domain", domainName) diff --git a/internal/scheduling/nova/plugins/filters/filter_external_customer_test.go b/internal/scheduling/nova/plugins/filters/filter_external_customer_test.go index fb9971a83..7ca313dc3 100644 --- a/internal/scheduling/nova/plugins/filters/filter_external_customer_test.go +++ b/internal/scheduling/nova/plugins/filters/filter_external_customer_test.go @@ -245,7 +245,7 @@ func TestFilterExternalCustomerStep_Run(t *testing.T) { filteredHosts: []string{"host3"}, }, { - name: "Missing domain_name in scheduler hints - error", + name: "Missing domain_name in scheduler hints - skips filter, all hosts pass", opts: FilterExternalCustomerStepOpts{ CustomerDomainNamePrefixes: []string{"ext-"}, }, @@ -257,12 +257,14 @@ func TestFilterExternalCustomerStep_Run(t *testing.T) { }, Hosts: []api.ExternalSchedulerHost{ {ComputeHost: "host1"}, + {ComputeHost: "host3"}, }, }, - expectError: true, + expectedHosts: []string{"host1", "host3"}, + filteredHosts: []string{}, }, { - name: "Nil scheduler hints - error", + name: "Nil scheduler hints - skips filter, all hosts pass", opts: FilterExternalCustomerStepOpts{ CustomerDomainNamePrefixes: []string{"ext-"}, }, @@ -274,9 +276,11 @@ func TestFilterExternalCustomerStep_Run(t *testing.T) { }, Hosts: []api.ExternalSchedulerHost{ {ComputeHost: "host1"}, + {ComputeHost: "host2"}, }, }, - expectError: true, + expectedHosts: []string{"host1", "host2"}, + filteredHosts: []string{}, }, { name: "Case sensitive prefix matching", diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go index 198f1a28f..5e1f1dc3c 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go @@ -56,15 +56,14 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa return nil, err } for _, hv := range hvs.Items { - // This case would be caught below, but we want to log this explicitly. if hv.Status.EffectiveCapacity == nil { - traceLog.Warn("hypervisor with nil effective capacity, skipping", "host", hv.Name) - continue + traceLog.Warn("hypervisor with nil effective capacity, use capacity instead (overprovisioning not considered)", "host", hv.Name) + freeResourcesByHost[hv.Name] = hv.Status.Capacity + } else { + // Start with the total effective capacity which is capacity * overcommit ratio. + freeResourcesByHost[hv.Name] = hv.Status.EffectiveCapacity } - // Start with the total effective capacity which is capacity * overcommit ratio. - freeResourcesByHost[hv.Name] = hv.Status.EffectiveCapacity - // Subtract allocated resources. for resourceName, allocated := range hv.Status.Allocation { free, ok := freeResourcesByHost[hv.Name][resourceName] diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go index 4068cf900..452782484 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go @@ -51,6 +51,49 @@ func newHypervisor(name, cpuCap, cpuAlloc, memCap, memAlloc string) *hv1.Hypervi } } +// newHypervisorWithCapacityOnly creates a hypervisor with only Capacity set (no EffectiveCapacity). +func newHypervisorWithCapacityOnly(name, cpuCap, memCap string) *hv1.Hypervisor { + return &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Status: hv1.HypervisorStatus{ + Capacity: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceCPU: resource.MustParse(cpuCap), + hv1.ResourceMemory: resource.MustParse(memCap), + }, + Allocation: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceCPU: resource.MustParse("0"), + hv1.ResourceMemory: resource.MustParse("0"), + }, + }, + } +} + +// newHypervisorWithBothCapacities creates a hypervisor with both Capacity and EffectiveCapacity set. +// EffectiveCapacity is typically >= Capacity due to overcommit ratio. +func newHypervisorWithBothCapacities(name, cpuCap, cpuEffCap, memCap, memEffCap string) *hv1.Hypervisor { + return &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Status: hv1.HypervisorStatus{ + Capacity: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceCPU: resource.MustParse(cpuCap), + hv1.ResourceMemory: resource.MustParse(memCap), + }, + EffectiveCapacity: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceCPU: resource.MustParse(cpuEffCap), + hv1.ResourceMemory: resource.MustParse(memEffCap), + }, + Allocation: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceCPU: resource.MustParse("0"), + hv1.ResourceMemory: resource.MustParse("0"), + }, + }, + } +} + func newCommittedReservation( name, targetHost, observedHost, projectID, flavorName, flavorGroup, cpu, memory string, specAllocations map[string]v1alpha1.CommittedResourceAllocation, // Spec allocations for CR @@ -440,7 +483,7 @@ func TestFilterHasEnoughCapacity_ReservationTypes(t *testing.T) { t.Run(tt.name, func(t *testing.T) { objects := make([]client.Object, 0, len(hypervisors)+len(tt.reservations)) for _, h := range hypervisors { - objects = append(objects, h) + objects = append(objects, h.DeepCopy()) } for _, r := range tt.reservations { objects = append(objects, r) @@ -469,3 +512,87 @@ func TestFilterHasEnoughCapacity_ReservationTypes(t *testing.T) { }) } } + +func TestFilterHasEnoughCapacity_NilEffectiveCapacityFallback(t *testing.T) { + scheme := buildTestScheme(t) + + tests := []struct { + name string + hypervisors []*hv1.Hypervisor + request api.ExternalSchedulerRequest + expectedHosts []string + filteredHosts []string + }{ + { + name: "Hypervisor with nil EffectiveCapacity uses Capacity fallback", + hypervisors: []*hv1.Hypervisor{ + newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // has EffectiveCapacity: 8 CPU free, 16Gi free + newHypervisorWithCapacityOnly("host2", "8", "16Gi"), // nil EffectiveCapacity, uses Capacity: 8 CPU free, 16Gi free + newHypervisorWithCapacityOnly("host3", "2", "4Gi"), // nil EffectiveCapacity, uses Capacity: 2 CPU free (not enough) + newHypervisorWithCapacityOnly("host4", "16", "32Gi"), // nil EffectiveCapacity, uses Capacity: 16 CPU free, 32Gi free + }, + request: newNovaRequest("instance-123", "project-A", "m1.small", "gp-1", 4, "8Gi", false, []string{"host1", "host2", "host3", "host4"}), + expectedHosts: []string{"host1", "host2", "host4"}, + filteredHosts: []string{"host3"}, + }, + { + name: "All hypervisors with nil EffectiveCapacity use Capacity fallback", + hypervisors: []*hv1.Hypervisor{ + newHypervisorWithCapacityOnly("host1", "8", "16Gi"), + newHypervisorWithCapacityOnly("host2", "4", "8Gi"), + }, + request: newNovaRequest("instance-123", "project-A", "m1.small", "gp-1", 4, "8Gi", false, []string{"host1", "host2"}), + expectedHosts: []string{"host1", "host2"}, + filteredHosts: []string{}, + }, + { + name: "EffectiveCapacity used when both are set (overcommit scenario)", + hypervisors: []*hv1.Hypervisor{ + // host1: Capacity=8 CPU, EffectiveCapacity=16 CPU (2x overcommit) + // With Capacity only: 8 free -> passes + // With EffectiveCapacity: 16 free -> passes (more capacity available) + newHypervisorWithBothCapacities("host1", "8", "16", "16Gi", "32Gi"), + // host2: Capacity=4 CPU, EffectiveCapacity=8 CPU (2x overcommit) + // With Capacity only: 4 free -> would be filtered (need 5) + // With EffectiveCapacity: 8 free -> passes + newHypervisorWithBothCapacities("host2", "4", "8", "8Gi", "16Gi"), + // host3: Capacity=4 CPU, EffectiveCapacity=4 CPU (no overcommit) + // Both: 4 free -> filtered (need 5) + newHypervisorWithBothCapacities("host3", "4", "4", "8Gi", "8Gi"), + }, + request: newNovaRequest("instance-123", "project-A", "m1.small", "gp-1", 5, "8Gi", false, []string{"host1", "host2", "host3"}), + expectedHosts: []string{"host1", "host2"}, + filteredHosts: []string{"host3"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objects := make([]client.Object, 0, len(tt.hypervisors)) + for _, h := range tt.hypervisors { + objects = append(objects, h.DeepCopy()) + } + + step := &FilterHasEnoughCapacity{} + step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() + step.Options = FilterHasEnoughCapacityOpts{LockReserved: false} + + result, err := step.Run(slog.Default(), tt.request) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + for _, host := range tt.expectedHosts { + if _, ok := result.Activations[host]; !ok { + t.Errorf("expected host %s to be present in activations, but got %+v", host, result.Activations) + } + } + + for _, host := range tt.filteredHosts { + if _, ok := result.Activations[host]; ok { + t.Errorf("expected host %s to be filtered out", host) + } + } + }) + } +}