Skip to content

Commit 8a86e29

Browse files
author
Yuepeng Pan
committed
[FLINK-33653][runtime] Introduce a benchmark for balanced tasks scheduling.
1 parent 6810df8 commit 8a86e29

6 files changed

Lines changed: 599 additions & 0 deletions

File tree

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.flink.scheduler.benchmark.slot.matching.resolver;
20+
21+
import org.apache.flink.configuration.TaskManagerOptions.TaskManagerLoadBalanceMode;
22+
import org.apache.flink.runtime.clusterframework.types.AllocationID;
23+
import org.apache.flink.runtime.clusterframework.types.ResourceID;
24+
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
25+
import org.apache.flink.runtime.jobgraph.JobVertexID;
26+
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
27+
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot;
28+
import org.apache.flink.runtime.scheduler.adaptive.allocator.SimpleSlotMatchingResolver;
29+
import org.apache.flink.runtime.scheduler.adaptive.allocator.SlotMatchingResolver;
30+
import org.apache.flink.runtime.scheduler.adaptive.allocator.SlotSharingSlotAllocator.ExecutionSlotSharingGroup;
31+
import org.apache.flink.runtime.scheduler.adaptive.allocator.SlotsBalancedSlotMatchingResolver;
32+
import org.apache.flink.runtime.scheduler.adaptive.allocator.TasksBalancedSlotMatchingResolver;
33+
import org.apache.flink.runtime.scheduler.adaptive.allocator.TestingSlot;
34+
import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
35+
import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
36+
import org.apache.flink.scheduler.benchmark.SchedulerBenchmarkExecutorBase;
37+
38+
import org.openjdk.jmh.annotations.Benchmark;
39+
import org.openjdk.jmh.annotations.BenchmarkMode;
40+
import org.openjdk.jmh.annotations.Level;
41+
import org.openjdk.jmh.annotations.Mode;
42+
import org.openjdk.jmh.annotations.Param;
43+
import org.openjdk.jmh.annotations.Setup;
44+
import org.openjdk.jmh.infra.Blackhole;
45+
import org.openjdk.jmh.runner.RunnerException;
46+
47+
import java.net.InetAddress;
48+
import java.net.UnknownHostException;
49+
import java.util.ArrayList;
50+
import java.util.Collection;
51+
import java.util.HashSet;
52+
import java.util.List;
53+
import java.util.Set;
54+
55+
/** The executor to drive {@link SlotMatchingResolver}. */
56+
public class SlotMatchingResolverBenchmarkExecutor extends SchedulerBenchmarkExecutorBase {
57+
58+
/**
59+
* We set the number of slots is very smaller than the number of task managers
60+
* to simulate the production environment to the greatest extent possible.
61+
*/
62+
public static final int SLOTS_PER_TASKS_MANAGER = 8;
63+
public static final int TASK_MANAGERS = 128;
64+
65+
private static final int requestedSlotSharingGroups = 3;
66+
private static final List<SlotSharingGroup> slotSharingGroups = new ArrayList<>();
67+
private static final Collection<ExecutionSlotSharingGroup> requestGroups = new ArrayList<>();
68+
private static final Collection<PhysicalSlot> slots = new ArrayList<>();
69+
70+
static {
71+
// For ResourceProfile.UNKNOWN.
72+
slotSharingGroups.add(new SlotSharingGroup());
73+
// For other resource profiles.
74+
for (int i = 1; i < requestedSlotSharingGroups; i++) {
75+
SlotSharingGroup sharingGroup = new SlotSharingGroup();
76+
sharingGroup.setResourceProfile(newGrainfinedResourceProfile(i));
77+
slotSharingGroups.add(sharingGroup);
78+
}
79+
// For requested groups and slots.
80+
for (int tmIndex = 0; tmIndex < TASK_MANAGERS; tmIndex++) {
81+
82+
TaskManagerLocation tml = getTaskManagerLocation(tmIndex + 1);
83+
84+
for (int slotIndex = 0; slotIndex < SLOTS_PER_TASKS_MANAGER; slotIndex++) {
85+
ResourceProfile profile = newGrainfinedResourceProfile(slotIndex);
86+
87+
slots.add(new TestingSlot(new AllocationID(), profile, tml));
88+
requestGroups.add(getExecutionSlotSharingGroup(slotIndex + 1, slotIndex));
89+
}
90+
}
91+
}
92+
93+
private static ExecutionSlotSharingGroup getExecutionSlotSharingGroup(
94+
int loading, int slotIndex) {
95+
Set<ExecutionVertexID> executionVertexIDSet = new HashSet<>();
96+
JobVertexID jobVertexID = new JobVertexID();
97+
for (int i = 0; i < loading; i++) {
98+
executionVertexIDSet.add(new ExecutionVertexID(jobVertexID, i));
99+
}
100+
return new ExecutionSlotSharingGroup(
101+
slotSharingGroups.get(slotIndex % 3), executionVertexIDSet);
102+
}
103+
104+
public static TaskManagerLocation getTaskManagerLocation(int dataPort) {
105+
try {
106+
InetAddress inetAddress = InetAddress.getByName("1.2.3.4");
107+
return new TaskManagerLocation(ResourceID.generate(), inetAddress, dataPort);
108+
} catch (UnknownHostException e) {
109+
throw new RuntimeException(e);
110+
}
111+
}
112+
113+
public static ResourceProfile newGrainfinedResourceProfile(int slotIndex) {
114+
return ResourceProfile.newBuilder()
115+
.setCpuCores(slotIndex % 2 == 0 ? 1 : 2)
116+
.setTaskHeapMemoryMB(100)
117+
.setTaskOffHeapMemoryMB(100)
118+
.setManagedMemoryMB(100)
119+
.build();
120+
}
121+
122+
@Param({"NONE", "SLOTS", "TASKS"})
123+
private TaskManagerLoadBalanceMode taskManagerLoadBalanceMode;
124+
125+
private SlotMatchingResolver slotMatchingResolver;
126+
127+
public static void main(String[] args) throws RunnerException {
128+
runBenchmark(SlotMatchingResolverBenchmarkExecutor.class);
129+
}
130+
131+
@Setup(Level.Trial)
132+
public void setup() throws Exception {
133+
slotMatchingResolver = getSlotMatchingResolver();
134+
}
135+
136+
@Benchmark
137+
@BenchmarkMode(Mode.SingleShotTime)
138+
public void runSlotsMatching(Blackhole blackhole) {
139+
blackhole.consume(
140+
slotMatchingResolver.matchSlotSharingGroupWithSlots(requestGroups, slots));
141+
}
142+
143+
private SlotMatchingResolver getSlotMatchingResolver() {
144+
switch (taskManagerLoadBalanceMode) {
145+
case NONE:
146+
this.slotMatchingResolver = SimpleSlotMatchingResolver.INSTANCE;
147+
break;
148+
case SLOTS:
149+
this.slotMatchingResolver =
150+
SlotsBalancedSlotMatchingResolver.INSTANCE;
151+
break;
152+
case TASKS:
153+
this.slotMatchingResolver =
154+
TasksBalancedSlotMatchingResolver.INSTANCE;
155+
break;
156+
default:
157+
throw new UnsupportedOperationException(
158+
String.format(
159+
"Unsupported task manager load balance mode '%s' in %s",
160+
taskManagerLoadBalanceMode,
161+
getClass().getName()));
162+
}
163+
return slotMatchingResolver;
164+
}
165+
}
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.flink.scheduler.benchmark.slot.matching.strategy;
20+
21+
import org.apache.flink.configuration.TaskManagerOptions.TaskManagerLoadBalanceMode;
22+
import org.apache.flink.runtime.clusterframework.types.AllocationID;
23+
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
24+
import org.apache.flink.runtime.jobmaster.SlotRequestId;
25+
import org.apache.flink.runtime.jobmaster.slotpool.PendingRequest;
26+
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot;
27+
import org.apache.flink.runtime.jobmaster.slotpool.RequestSlotMatchingStrategy;
28+
import org.apache.flink.runtime.jobmaster.slotpool.SimpleRequestSlotMatchingStrategy;
29+
import org.apache.flink.runtime.jobmaster.slotpool.TasksBalancedRequestSlotMatchingStrategy;
30+
import org.apache.flink.runtime.scheduler.adaptive.allocator.TestingSlot;
31+
import org.apache.flink.runtime.scheduler.loading.DefaultLoadingWeight;
32+
import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
33+
import org.apache.flink.scheduler.benchmark.SchedulerBenchmarkExecutorBase;
34+
35+
import org.openjdk.jmh.annotations.Benchmark;
36+
import org.openjdk.jmh.annotations.BenchmarkMode;
37+
import org.openjdk.jmh.annotations.Level;
38+
import org.openjdk.jmh.annotations.Mode;
39+
import org.openjdk.jmh.annotations.Param;
40+
import org.openjdk.jmh.annotations.Setup;
41+
import org.openjdk.jmh.infra.Blackhole;
42+
import org.openjdk.jmh.runner.RunnerException;
43+
44+
import java.util.ArrayList;
45+
import java.util.Collection;
46+
import java.util.Collections;
47+
import java.util.HashMap;
48+
49+
import static org.apache.flink.scheduler.benchmark.slot.matching.resolver.SlotMatchingResolverBenchmarkExecutor.SLOTS_PER_TASKS_MANAGER;
50+
import static org.apache.flink.scheduler.benchmark.slot.matching.resolver.SlotMatchingResolverBenchmarkExecutor.TASK_MANAGERS;
51+
import static org.apache.flink.scheduler.benchmark.slot.matching.resolver.SlotMatchingResolverBenchmarkExecutor.getTaskManagerLocation;
52+
import static org.apache.flink.scheduler.benchmark.slot.matching.resolver.SlotMatchingResolverBenchmarkExecutor.newGrainfinedResourceProfile;
53+
54+
/** The executor to drive {@link RequestSlotMatchingStrategy}. */
55+
public class RequestSlotMatchingStrategyBenchmarkExecutor
56+
extends SchedulerBenchmarkExecutorBase {
57+
58+
private static final Collection<PhysicalSlot> slots = new ArrayList<>();
59+
private static final Collection<PendingRequest> slotRequests = new ArrayList<>();
60+
61+
static {
62+
// For requested groups and slots.
63+
for (int tmIndex = 0; tmIndex < TASK_MANAGERS; tmIndex++) {
64+
65+
TaskManagerLocation tml = getTaskManagerLocation(tmIndex + 1);
66+
67+
for (int slotIndex = 0; slotIndex < SLOTS_PER_TASKS_MANAGER; slotIndex++) {
68+
ResourceProfile profile = newGrainfinedResourceProfile(slotIndex);
69+
70+
slots.add(new TestingSlot(new AllocationID(), profile, tml));
71+
slotRequests.add(getPendingRequest(slotIndex + 1, slotIndex));
72+
}
73+
}
74+
}
75+
76+
private static PendingRequest getPendingRequest(float loading, int slotIndex) {
77+
return PendingRequest.createNormalRequest(
78+
new SlotRequestId(),
79+
newGrainfinedResourceProfile(slotIndex),
80+
new DefaultLoadingWeight(loading),
81+
Collections.emptyList());
82+
}
83+
84+
@Param({"NONE", "TASKS"})
85+
private TaskManagerLoadBalanceMode taskManagerLoadBalanceMode;
86+
87+
private RequestSlotMatchingStrategy requestSlotMatchingStrategy;
88+
89+
public static void main(String[] args) throws RunnerException {
90+
runBenchmark(RequestSlotMatchingStrategyBenchmarkExecutor.class);
91+
}
92+
93+
@Setup(Level.Trial)
94+
public void setup() throws Exception {
95+
requestSlotMatchingStrategy = getRequestSlotMatchingStrategy();
96+
}
97+
98+
@Benchmark
99+
@BenchmarkMode(Mode.SingleShotTime)
100+
public void runSlotsMatching(Blackhole blackhole) {
101+
blackhole.consume(
102+
requestSlotMatchingStrategy.matchRequestsAndSlots(
103+
slots, slotRequests, new HashMap<>()));
104+
}
105+
106+
private RequestSlotMatchingStrategy getRequestSlotMatchingStrategy() {
107+
switch (taskManagerLoadBalanceMode) {
108+
case TASKS:
109+
this.requestSlotMatchingStrategy =
110+
TasksBalancedRequestSlotMatchingStrategy.INSTANCE;
111+
break;
112+
case NONE:
113+
this.requestSlotMatchingStrategy = SimpleRequestSlotMatchingStrategy.INSTANCE;
114+
break;
115+
default:
116+
throw new UnsupportedOperationException(
117+
String.format(
118+
"Unsupported task manager load balance mode '%s' in %s",
119+
taskManagerLoadBalanceMode,
120+
getClass().getName()));
121+
}
122+
return requestSlotMatchingStrategy;
123+
}
124+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package org.apache.flink.scheduler.benchmark.slot.sharing.resolver;
2+
3+
import org.apache.flink.configuration.SchedulerExecutionMode;
4+
import org.apache.flink.configuration.TaskManagerOptions.TaskManagerLoadBalanceMode;
5+
import org.apache.flink.runtime.JobException;
6+
import org.apache.flink.runtime.client.JobExecutionException;
7+
import org.apache.flink.runtime.executiongraph.AccessExecutionJobVertex;
8+
import org.apache.flink.runtime.executiongraph.ExecutionGraph;
9+
import org.apache.flink.runtime.executiongraph.TestingDefaultExecutionGraphBuilder;
10+
import org.apache.flink.runtime.jobgraph.JobGraph;
11+
import org.apache.flink.runtime.jobgraph.JobGraphBuilder;
12+
import org.apache.flink.runtime.jobgraph.JobVertex;
13+
import org.apache.flink.runtime.scheduler.SchedulerBase;
14+
import org.apache.flink.runtime.scheduler.VertexParallelismStore;
15+
import org.apache.flink.runtime.scheduler.adaptive.JobGraphJobInformation;
16+
import org.apache.flink.runtime.scheduler.adaptive.allocator.DefaultSlotSharingResolver;
17+
import org.apache.flink.runtime.scheduler.adaptive.allocator.JobInformation;
18+
import org.apache.flink.runtime.scheduler.adaptive.allocator.SlotSharingResolver;
19+
import org.apache.flink.runtime.scheduler.adaptive.allocator.SlotSharingSlotAllocator;
20+
import org.apache.flink.runtime.scheduler.adaptive.allocator.TaskBalancedSlotSharingResolver;
21+
import org.apache.flink.runtime.scheduler.adaptive.allocator.VertexParallelism;
22+
import org.apache.flink.runtime.testutils.DirectScheduledExecutorService;
23+
24+
import java.util.Collection;
25+
import java.util.stream.Collectors;
26+
27+
import static org.apache.flink.runtime.scheduler.SchedulerBase.computeVertexParallelismStore;
28+
29+
/** The benchmark of initializing {@link org.apache.flink.runtime.scheduler.adaptive.allocator.SlotSharingResolver}. */
30+
public class SlotSharingResolverBenchmark {
31+
32+
private final JobInformation jobInformation;
33+
private final VertexParallelism vertexParallelism;
34+
private final TaskManagerLoadBalanceMode taskManagerLoadBalanceMode;
35+
36+
public SlotSharingResolverBenchmark(
37+
TaskManagerLoadBalanceMode taskManagerLoadBalanceMode, Collection<JobVertex> vertices) {
38+
this.taskManagerLoadBalanceMode = taskManagerLoadBalanceMode;
39+
final JobGraph jobGraph =
40+
JobGraphBuilder.newStreamingJobGraphBuilder().addJobVertices(vertices).build();
41+
try {
42+
ExecutionGraph executionGraph =
43+
TestingDefaultExecutionGraphBuilder.newBuilder()
44+
.setJobGraph(jobGraph)
45+
.build(new DirectScheduledExecutorService());
46+
VertexParallelismStore vertexParallelismStore = computeVertexParallelismStore(jobGraph);
47+
this.jobInformation = new JobGraphJobInformation(jobGraph, vertexParallelismStore);
48+
this.vertexParallelism = new VertexParallelism(
49+
executionGraph.getAllVertices().values().stream()
50+
.collect(
51+
Collectors.toMap(
52+
AccessExecutionJobVertex::getJobVertexId,
53+
AccessExecutionJobVertex::getParallelism)));
54+
} catch (JobException | JobExecutionException e) {
55+
throw new RuntimeException(e);
56+
}
57+
}
58+
59+
public Collection<SlotSharingSlotAllocator.ExecutionSlotSharingGroup> invokeSlotSharingResolver() {
60+
SlotSharingResolver slotSharingResolver = createSlotSharingResolver();
61+
return slotSharingResolver.getExecutionSlotSharingGroups(jobInformation, vertexParallelism);
62+
}
63+
64+
private SlotSharingResolver createSlotSharingResolver() {
65+
switch (taskManagerLoadBalanceMode) {
66+
case NONE:
67+
return DefaultSlotSharingResolver.INSTANCE;
68+
case TASKS:
69+
return TaskBalancedSlotSharingResolver.INSTANCE;
70+
default:
71+
throw new UnsupportedOperationException(
72+
String.format(
73+
"Unsupported task manager load balance mode '%s' in %s",
74+
taskManagerLoadBalanceMode,
75+
getClass().getName()));
76+
}
77+
}
78+
}

0 commit comments

Comments
 (0)