Skip to content

Commit 8e84e9f

Browse files
committed
Add CSM for batch write flow control
1 parent f142db8 commit 8e84e9f

File tree

9 files changed

+406
-34
lines changed

9 files changed

+406
-34
lines changed

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/RateLimitingServerStreamingCallable.java

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
*/
1616
package com.google.cloud.bigtable.data.v2.stub;
1717

18+
import static com.google.cloud.bigtable.data.v2.stub.metrics.Util.extractStatus;
19+
1820
import com.google.api.gax.rpc.ApiCallContext;
1921
import com.google.api.gax.rpc.DeadlineExceededException;
2022
import com.google.api.gax.rpc.ResourceExhaustedException;
@@ -37,6 +39,7 @@
3739
import java.util.concurrent.atomic.AtomicReference;
3840
import java.util.logging.Logger;
3941
import javax.annotation.Nonnull;
42+
import javax.annotation.Nullable;
4043

4144
class RateLimitingServerStreamingCallable
4245
extends ServerStreamingCallable<MutateRowsRequest, MutateRowsResponse> {
@@ -69,6 +72,8 @@ class RateLimitingServerStreamingCallable
6972

7073
private final ServerStreamingCallable<MutateRowsRequest, MutateRowsResponse> innerCallable;
7174

75+
private BigtableTracer bigtableTracer;
76+
7277
RateLimitingServerStreamingCallable(
7378
@Nonnull ServerStreamingCallable<MutateRowsRequest, MutateRowsResponse> innerCallable) {
7479
this.limiter = new ConditionalRateLimiter(DEFAULT_QPS);
@@ -84,8 +89,8 @@ public void call(
8489
limiter.acquire();
8590
stopwatch.stop();
8691
if (context.getTracer() instanceof BigtableTracer) {
87-
((BigtableTracer) context.getTracer())
88-
.batchRequestThrottled(stopwatch.elapsed(TimeUnit.NANOSECONDS));
92+
bigtableTracer = (BigtableTracer) context.getTracer();
93+
bigtableTracer.batchRequestThrottled(stopwatch.elapsed(TimeUnit.NANOSECONDS));
8994
}
9095
RateLimitingResponseObserver innerObserver = new RateLimitingResponseObserver(responseObserver);
9196
innerCallable.call(request, innerObserver, context);
@@ -104,7 +109,10 @@ static class ConditionalRateLimiter {
104109

105110
public ConditionalRateLimiter(long defaultQps) {
106111
limiter = RateLimiter.create(defaultQps);
107-
logger.info("Rate limiting is initiated (but disabled) with rate of " + defaultQps + " QPS.");
112+
logger.info(
113+
"Batch write flow control: rate limiter is initiated (but disabled) with rate of "
114+
+ defaultQps
115+
+ " QPS.");
108116
}
109117

110118
/**
@@ -128,7 +136,7 @@ public void tryDisable() {
128136
if (now.isAfter(nextTime)) {
129137
boolean wasEnabled = this.enabled.getAndSet(false);
130138
if (wasEnabled) {
131-
logger.info("Rate limiter is disabled.");
139+
logger.info("Batch write flow control: rate limiter is disabled.");
132140
}
133141
// No need to update nextRateUpdateTime, any new RateLimitInfo can enable rate limiting and
134142
// update the rate again.
@@ -139,7 +147,7 @@ public void tryDisable() {
139147
public void enable() {
140148
boolean wasEnabled = this.enabled.getAndSet(true);
141149
if (!wasEnabled) {
142-
logger.info("Rate limiter is enabled.");
150+
logger.info("Batch write flow control: rate limiter is enabled.");
143151
}
144152
}
145153

@@ -158,31 +166,52 @@ public double getRate() {
158166
* @param rate The new rate of the rate limiter.
159167
* @param period The period during which rate should not be updated again and the rate limiter
160168
* should not be disabled.
169+
* @param bigtableTracer The tracer for exporting client-side metrics.
170+
* @param factor The capped factor that we're trying to apply.
171+
* @param status The status of the response from which the factor is retrieved or derived.
161172
*/
162-
public void trySetRate(double rate, Duration period) {
173+
public void trySetRate(
174+
double rate,
175+
Duration period,
176+
@Nullable BigtableTracer bigtableTracer,
177+
double factor,
178+
@Nullable Throwable status) {
163179
Instant nextTime = nextRateUpdateTime.get();
164180
Instant now = Instant.now();
165181

166182
if (now.isBefore(nextTime)) {
183+
if (bigtableTracer != null) {
184+
bigtableTracer.addBatchWriteFlowControlFactor(factor, status, false);
185+
}
167186
return;
168187
}
169188

170189
Instant newNextTime = now.plusSeconds(period.getSeconds());
171190

172191
if (!nextRateUpdateTime.compareAndSet(nextTime, newNextTime)) {
173192
// Someone else updated it already.
193+
if (bigtableTracer != null) {
194+
bigtableTracer.addBatchWriteFlowControlFactor(factor, status, false);
195+
}
174196
return;
175197
}
176198
final double oldRate = limiter.getRate();
177199
limiter.setRate(rate);
178200
logger.info(
179-
"Updated max rate from "
201+
"Batch write flow control: updated max rate from "
180202
+ oldRate
181203
+ " to "
182204
+ rate
205+
+ " applied factor "
206+
+ factor
183207
+ " with period "
184208
+ period.getSeconds()
185-
+ " seconds.");
209+
+ " seconds. Status="
210+
+ extractStatus(status));
211+
if (bigtableTracer != null) {
212+
bigtableTracer.setBatchWriteFlowControlTargetQps(rate);
213+
bigtableTracer.addBatchWriteFlowControlFactor(factor, status, true);
214+
}
186215
}
187216

188217
@VisibleForTesting
@@ -215,17 +244,21 @@ private boolean hasValidRateLimitInfo(MutateRowsResponse response) {
215244
// have presence even thought it's marked as "optional". Check the factor and
216245
// period to make sure they're not 0.
217246
if (!response.hasRateLimitInfo()) {
218-
logger.finest("Response carries no RateLimitInfo");
247+
logger.finest("Batch write flow control: response carries no RateLimitInfo");
219248
return false;
220249
}
221250

222251
if (response.getRateLimitInfo().getFactor() <= 0
223252
|| response.getRateLimitInfo().getPeriod().getSeconds() <= 0) {
224-
logger.finest("Response carries invalid RateLimitInfo=" + response.getRateLimitInfo());
253+
logger.finest(
254+
"Batch write flow control: response carries invalid RateLimitInfo="
255+
+ response.getRateLimitInfo());
225256
return false;
226257
}
227258

228-
logger.finest("Response carries valid RateLimitInfo=" + response.getRateLimitInfo());
259+
logger.finest(
260+
"Batch write flow control: response carries valid RateLimitInfo="
261+
+ response.getRateLimitInfo());
229262
return true;
230263
}
231264

@@ -236,7 +269,8 @@ protected void onResponseImpl(MutateRowsResponse response) {
236269
RateLimitInfo info = response.getRateLimitInfo();
237270
updateQps(
238271
info.getFactor(),
239-
Duration.ofSeconds(com.google.protobuf.util.Durations.toSeconds(info.getPeriod())));
272+
Duration.ofSeconds(com.google.protobuf.util.Durations.toSeconds(info.getPeriod())),
273+
null);
240274
} else {
241275
limiter.tryDisable();
242276
}
@@ -250,7 +284,7 @@ protected void onErrorImpl(Throwable t) {
250284
if (t instanceof DeadlineExceededException
251285
|| t instanceof UnavailableException
252286
|| t instanceof ResourceExhaustedException) {
253-
updateQps(MIN_FACTOR, DEFAULT_PERIOD);
287+
updateQps(MIN_FACTOR, DEFAULT_PERIOD, t);
254288
}
255289
outerObserver.onError(t);
256290
}
@@ -260,11 +294,11 @@ protected void onCompleteImpl() {
260294
outerObserver.onComplete();
261295
}
262296

263-
private void updateQps(double factor, Duration period) {
297+
private void updateQps(double factor, Duration period, @Nullable Throwable status) {
264298
double cappedFactor = Math.min(Math.max(factor, MIN_FACTOR), MAX_FACTOR);
265299
double currentRate = limiter.getRate();
266300
double cappedRate = Math.min(Math.max(currentRate * cappedFactor, MIN_QPS), MAX_QPS);
267-
limiter.trySetRate(cappedRate, period);
301+
limiter.trySetRate(cappedRate, period, bigtableTracer, cappedFactor, status);
268302
}
269303
}
270304

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracer.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package com.google.cloud.bigtable.data.v2.stub.metrics;
1717

1818
import com.google.api.core.BetaApi;
19+
import com.google.api.core.InternalApi;
1920
import com.google.api.gax.rpc.ApiCallContext;
2021
import com.google.api.gax.tracing.ApiTracer;
2122
import com.google.api.gax.tracing.BaseApiTracer;
@@ -115,4 +116,25 @@ public void grpcMessageSent() {
115116
public void setTotalTimeoutDuration(Duration totalTimeoutDuration) {
116117
// noop
117118
}
119+
120+
/**
121+
* Record the target QPS for batch write flow control.
122+
*
123+
* @param targetQps The new target QPS for the client.
124+
*/
125+
@InternalApi
126+
public void setBatchWriteFlowControlTargetQps(double targetQps) {}
127+
128+
/**
129+
* Record the factors received from server-side for batch write flow control. The factors are
130+
* capped by min and max allowed factor values. Status and whether the factor was actually applied
131+
* are also recorded.
132+
*
133+
* @param factor Capped factor from server-side. For non-OK response, min factor is used.
134+
* @param status The status of the response from which the factor is retrieved or derived.
135+
* @param applied Whether the factor was actually applied.
136+
*/
137+
@InternalApi
138+
public void addBatchWriteFlowControlFactor(
139+
double factor, @Nullable Throwable status, boolean applied) {}
118140
}

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsConstants.java

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,11 @@
2020
import com.google.common.collect.ImmutableMap;
2121
import com.google.common.collect.ImmutableSet;
2222
import io.opentelemetry.api.common.AttributeKey;
23-
import io.opentelemetry.sdk.metrics.Aggregation;
24-
import io.opentelemetry.sdk.metrics.InstrumentSelector;
25-
import io.opentelemetry.sdk.metrics.InstrumentType;
26-
import io.opentelemetry.sdk.metrics.View;
23+
import io.opentelemetry.sdk.metrics.*;
2724
import java.util.Map;
2825
import java.util.Set;
2926
import java.util.stream.Collectors;
27+
import javax.annotation.Nullable;
3028

3129
/** Defining Bigtable builit-in metrics scope, attributes, metric names and views. */
3230
@InternalApi
@@ -49,6 +47,7 @@ public class BuiltinMetricsConstants {
4947
static final AttributeKey<String> METHOD_KEY = AttributeKey.stringKey("method");
5048
static final AttributeKey<String> STATUS_KEY = AttributeKey.stringKey("status");
5149
static final AttributeKey<String> CLIENT_UID_KEY = AttributeKey.stringKey("client_uid");
50+
static final AttributeKey<Boolean> APPLIED_KEY = AttributeKey.booleanKey("applied");
5251

5352
static final AttributeKey<String> TRANSPORT_TYPE = AttributeKey.stringKey("transport_type");
5453
static final AttributeKey<String> TRANSPORT_REGION = AttributeKey.stringKey("transport_region");
@@ -95,6 +94,9 @@ public class BuiltinMetricsConstants {
9594
static final String CLIENT_BLOCKING_LATENCIES_NAME = "throttling_latencies";
9695
static final String PER_CONNECTION_ERROR_COUNT_NAME = "per_connection_error_count";
9796
static final String OUTSTANDING_RPCS_PER_CHANNEL_NAME = "connection_pool/outstanding_rpcs";
97+
static final String BATCH_WRITE_FLOW_CONTROL_TARGET_QPS_NAME =
98+
"batch_write_flow_control_target_qps";
99+
static final String BATCH_WRITE_FLOW_CONTROL_FACTOR_NAME = "batch_write_flow_control_factor";
98100

99101
// Start allow list of metrics that will be exported as internal
100102
public static final Map<String, Set<String>> GRPC_METRICS =
@@ -210,6 +212,8 @@ public class BuiltinMetricsConstants {
210212
70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0, 105.0, 110.0, 115.0, 120.0, 125.0, 130.0,
211213
135.0, 140.0, 145.0, 150.0, 155.0, 160.0, 165.0, 170.0, 175.0, 180.0, 185.0, 190.0,
212214
195.0, 200.0));
215+
private static final Aggregation AGGREGATION_BATCH_WRITE_FLOW_CONTROL_FACTOR_HISTOGRAM =
216+
Aggregation.explicitBucketHistogram(ImmutableList.of(0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3));
213217

214218
static final Set<AttributeKey> COMMON_ATTRIBUTES =
215219
ImmutableSet.of(
@@ -225,7 +229,7 @@ public class BuiltinMetricsConstants {
225229
static void defineView(
226230
ImmutableMap.Builder<InstrumentSelector, View> viewMap,
227231
String id,
228-
Aggregation aggregation,
232+
@Nullable Aggregation aggregation,
229233
InstrumentType type,
230234
String unit,
231235
Set<AttributeKey> attributes) {
@@ -242,14 +246,12 @@ static void defineView(
242246
COMMON_ATTRIBUTES.stream().map(AttributeKey::getKey).collect(Collectors.toSet()))
243247
.addAll(attributes.stream().map(AttributeKey::getKey).collect(Collectors.toSet()))
244248
.build();
245-
View view =
246-
View.builder()
247-
.setName(METER_NAME + id)
248-
.setAggregation(aggregation)
249-
.setAttributeFilter(attributesFilter)
250-
.build();
251-
252-
viewMap.put(selector, view);
249+
ViewBuilder viewBuilder =
250+
View.builder().setName(METER_NAME + id).setAttributeFilter(attributesFilter);
251+
if (aggregation != null) {
252+
viewBuilder.setAggregation(aggregation);
253+
}
254+
viewMap.put(selector, viewBuilder.build());
253255
}
254256

255257
// uses cloud.BigtableClient schema
@@ -367,7 +369,23 @@ public static Map<InstrumentSelector, View> getAllViews() {
367369
.addAll(COMMON_ATTRIBUTES)
368370
.add(STREAMING_KEY, STATUS_KEY)
369371
.build());
370-
372+
defineView(
373+
views,
374+
BATCH_WRITE_FLOW_CONTROL_TARGET_QPS_NAME,
375+
null,
376+
InstrumentType.GAUGE,
377+
"1",
378+
ImmutableSet.<AttributeKey>builder().addAll(COMMON_ATTRIBUTES).build());
379+
defineView(
380+
views,
381+
BATCH_WRITE_FLOW_CONTROL_FACTOR_NAME,
382+
AGGREGATION_BATCH_WRITE_FLOW_CONTROL_FACTOR_HISTOGRAM,
383+
InstrumentType.HISTOGRAM,
384+
"1",
385+
ImmutableSet.<AttributeKey>builder()
386+
.addAll(COMMON_ATTRIBUTES)
387+
.add(STATUS_KEY, APPLIED_KEY)
388+
.build());
371389
return views.build();
372390
}
373391
}

0 commit comments

Comments
 (0)