Skip to content

Commit 5f07efc

Browse files
committed
Add CSM for batch write flow control
1 parent f142db8 commit 5f07efc

File tree

8 files changed

+204
-53
lines changed

8 files changed

+204
-53
lines changed

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/RateLimitingServerStreamingCallable.java

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
*/
1616
package com.google.cloud.bigtable.data.v2.stub;
1717

18+
import static com.google.cloud.bigtable.data.v2.stub.metrics.Util.extractStatus;
19+
1820
import com.google.api.gax.rpc.ApiCallContext;
1921
import com.google.api.gax.rpc.DeadlineExceededException;
2022
import com.google.api.gax.rpc.ResourceExhaustedException;
@@ -69,6 +71,8 @@ class RateLimitingServerStreamingCallable
6971

7072
private final ServerStreamingCallable<MutateRowsRequest, MutateRowsResponse> innerCallable;
7173

74+
private BigtableTracer bigtableTracer;
75+
7276
RateLimitingServerStreamingCallable(
7377
@Nonnull ServerStreamingCallable<MutateRowsRequest, MutateRowsResponse> innerCallable) {
7478
this.limiter = new ConditionalRateLimiter(DEFAULT_QPS);
@@ -84,8 +88,8 @@ public void call(
8488
limiter.acquire();
8589
stopwatch.stop();
8690
if (context.getTracer() instanceof BigtableTracer) {
87-
((BigtableTracer) context.getTracer())
88-
.batchRequestThrottled(stopwatch.elapsed(TimeUnit.NANOSECONDS));
91+
bigtableTracer = (BigtableTracer) context.getTracer();
92+
bigtableTracer.batchRequestThrottled(stopwatch.elapsed(TimeUnit.NANOSECONDS));
8993
}
9094
RateLimitingResponseObserver innerObserver = new RateLimitingResponseObserver(responseObserver);
9195
innerCallable.call(request, innerObserver, context);
@@ -104,7 +108,10 @@ static class ConditionalRateLimiter {
104108

105109
public ConditionalRateLimiter(long defaultQps) {
106110
limiter = RateLimiter.create(defaultQps);
107-
logger.info("Rate limiting is initiated (but disabled) with rate of " + defaultQps + " QPS.");
111+
logger.info(
112+
"Batch write flow control: rate limiter is initiated (but disabled) with rate of "
113+
+ defaultQps
114+
+ " QPS.");
108115
}
109116

110117
/**
@@ -128,7 +135,7 @@ public void tryDisable() {
128135
if (now.isAfter(nextTime)) {
129136
boolean wasEnabled = this.enabled.getAndSet(false);
130137
if (wasEnabled) {
131-
logger.info("Rate limiter is disabled.");
138+
logger.info("Batch write flow control: rate limiter is disabled.");
132139
}
133140
// No need to update nextRateUpdateTime, any new RateLimitInfo can enable rate limiting and
134141
// update the rate again.
@@ -139,7 +146,7 @@ public void tryDisable() {
139146
public void enable() {
140147
boolean wasEnabled = this.enabled.getAndSet(true);
141148
if (!wasEnabled) {
142-
logger.info("Rate limiter is enabled.");
149+
logger.info("Batch write flow control: rate limiter is enabled.");
143150
}
144151
}
145152

@@ -158,31 +165,49 @@ public double getRate() {
158165
* @param rate The new rate of the rate limiter.
159166
* @param period The period during which rate should not be updated again and the rate limiter
160167
* should not be disabled.
168+
* @param bigtableTracer The tracer for exporting client-side metrics.
161169
*/
162-
public void trySetRate(double rate, Duration period) {
170+
public void trySetRate(
171+
double rate,
172+
Duration period,
173+
BigtableTracer bigtableTracer,
174+
double factor,
175+
String statusString) {
163176
Instant nextTime = nextRateUpdateTime.get();
164177
Instant now = Instant.now();
165178

166179
if (now.isBefore(nextTime)) {
180+
if (bigtableTracer != null) {
181+
bigtableTracer.addBatchWriteFlowControlFactor(factor, statusString, false);
182+
}
167183
return;
168184
}
169185

170186
Instant newNextTime = now.plusSeconds(period.getSeconds());
171187

172188
if (!nextRateUpdateTime.compareAndSet(nextTime, newNextTime)) {
173189
// Someone else updated it already.
190+
if (bigtableTracer != null) {
191+
bigtableTracer.addBatchWriteFlowControlFactor(factor, statusString, false);
192+
}
174193
return;
175194
}
176195
final double oldRate = limiter.getRate();
177196
limiter.setRate(rate);
178197
logger.info(
179-
"Updated max rate from "
198+
"Batch write flow control: updated max rate from "
180199
+ oldRate
181200
+ " to "
182201
+ rate
202+
+ " applied factor "
203+
+ factor
183204
+ " with period "
184205
+ period.getSeconds()
185206
+ " seconds.");
207+
if (bigtableTracer != null) {
208+
bigtableTracer.setBatchWriteFlowControlTargetQps(rate);
209+
bigtableTracer.addBatchWriteFlowControlFactor(factor, statusString, true);
210+
}
186211
}
187212

188213
@VisibleForTesting
@@ -215,17 +240,21 @@ private boolean hasValidRateLimitInfo(MutateRowsResponse response) {
215240
// have presence even thought it's marked as "optional". Check the factor and
216241
// period to make sure they're not 0.
217242
if (!response.hasRateLimitInfo()) {
218-
logger.finest("Response carries no RateLimitInfo");
243+
logger.finest("Batch write flow control: response carries no RateLimitInfo");
219244
return false;
220245
}
221246

222247
if (response.getRateLimitInfo().getFactor() <= 0
223248
|| response.getRateLimitInfo().getPeriod().getSeconds() <= 0) {
224-
logger.finest("Response carries invalid RateLimitInfo=" + response.getRateLimitInfo());
249+
logger.finest(
250+
"Batch write flow control: response carries invalid RateLimitInfo="
251+
+ response.getRateLimitInfo());
225252
return false;
226253
}
227254

228-
logger.finest("Response carries valid RateLimitInfo=" + response.getRateLimitInfo());
255+
logger.finest(
256+
"Batch write flow control: response carries valid RateLimitInfo="
257+
+ response.getRateLimitInfo());
229258
return true;
230259
}
231260

@@ -236,7 +265,8 @@ protected void onResponseImpl(MutateRowsResponse response) {
236265
RateLimitInfo info = response.getRateLimitInfo();
237266
updateQps(
238267
info.getFactor(),
239-
Duration.ofSeconds(com.google.protobuf.util.Durations.toSeconds(info.getPeriod())));
268+
Duration.ofSeconds(com.google.protobuf.util.Durations.toSeconds(info.getPeriod())),
269+
extractStatus(null));
240270
} else {
241271
limiter.tryDisable();
242272
}
@@ -250,7 +280,15 @@ protected void onErrorImpl(Throwable t) {
250280
if (t instanceof DeadlineExceededException
251281
|| t instanceof UnavailableException
252282
|| t instanceof ResourceExhaustedException) {
253-
updateQps(MIN_FACTOR, DEFAULT_PERIOD);
283+
logger.info(
284+
"Batch write flow control: received error "
285+
+ extractStatus(t)
286+
+ " applying min factor "
287+
+ MIN_FACTOR
288+
+ " with period "
289+
+ DEFAULT_PERIOD
290+
+ " seconds.");
291+
updateQps(MIN_FACTOR, DEFAULT_PERIOD, extractStatus(t));
254292
}
255293
outerObserver.onError(t);
256294
}
@@ -260,11 +298,11 @@ protected void onCompleteImpl() {
260298
outerObserver.onComplete();
261299
}
262300

263-
private void updateQps(double factor, Duration period) {
301+
private void updateQps(double factor, Duration period, String statusString) {
264302
double cappedFactor = Math.min(Math.max(factor, MIN_FACTOR), MAX_FACTOR);
265303
double currentRate = limiter.getRate();
266304
double cappedRate = Math.min(Math.max(currentRate * cappedFactor, MIN_QPS), MAX_QPS);
267-
limiter.trySetRate(cappedRate, period);
305+
limiter.trySetRate(cappedRate, period, bigtableTracer, cappedFactor, statusString);
268306
}
269307
}
270308

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracer.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package com.google.cloud.bigtable.data.v2.stub.metrics;
1717

1818
import com.google.api.core.BetaApi;
19+
import com.google.api.core.InternalApi;
1920
import com.google.api.gax.rpc.ApiCallContext;
2021
import com.google.api.gax.tracing.ApiTracer;
2122
import com.google.api.gax.tracing.BaseApiTracer;
@@ -115,4 +116,24 @@ public void grpcMessageSent() {
115116
public void setTotalTimeoutDuration(Duration totalTimeoutDuration) {
116117
// noop
117118
}
119+
120+
/**
121+
* Record the target QPS for batch write flow control.
122+
*
123+
* @param targetQps The new target QPS for the client.
124+
*/
125+
@InternalApi
126+
public void setBatchWriteFlowControlTargetQps(double targetQps) {}
127+
128+
/**
129+
* Record the factors received from server-side for batch write flow control. The factors are
130+
* capped by min and max allowed factor values. Status and whether the factor was actually applied
131+
* are also recorded.
132+
*
133+
* @param factor Capped factor from server-side. For non-OK response, min factor is used.
134+
* @param statusString Status code of the request.
135+
* @param applied Whether the factor was actually applied.
136+
*/
137+
@InternalApi
138+
public void addBatchWriteFlowControlFactor(double factor, String statusString, boolean applied) {}
118139
}

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsConstants.java

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,9 @@
2020
import com.google.common.collect.ImmutableMap;
2121
import com.google.common.collect.ImmutableSet;
2222
import io.opentelemetry.api.common.AttributeKey;
23-
import io.opentelemetry.sdk.metrics.Aggregation;
24-
import io.opentelemetry.sdk.metrics.InstrumentSelector;
25-
import io.opentelemetry.sdk.metrics.InstrumentType;
26-
import io.opentelemetry.sdk.metrics.View;
23+
import io.opentelemetry.sdk.metrics.*;
24+
25+
import javax.annotation.Nullable;
2726
import java.util.Map;
2827
import java.util.Set;
2928
import java.util.stream.Collectors;
@@ -49,6 +48,7 @@ public class BuiltinMetricsConstants {
4948
static final AttributeKey<String> METHOD_KEY = AttributeKey.stringKey("method");
5049
static final AttributeKey<String> STATUS_KEY = AttributeKey.stringKey("status");
5150
static final AttributeKey<String> CLIENT_UID_KEY = AttributeKey.stringKey("client_uid");
51+
static final AttributeKey<Boolean> APPLIED_KEY = AttributeKey.booleanKey("applied");
5252

5353
static final AttributeKey<String> TRANSPORT_TYPE = AttributeKey.stringKey("transport_type");
5454
static final AttributeKey<String> TRANSPORT_REGION = AttributeKey.stringKey("transport_region");
@@ -95,6 +95,9 @@ public class BuiltinMetricsConstants {
9595
static final String CLIENT_BLOCKING_LATENCIES_NAME = "throttling_latencies";
9696
static final String PER_CONNECTION_ERROR_COUNT_NAME = "per_connection_error_count";
9797
static final String OUTSTANDING_RPCS_PER_CHANNEL_NAME = "connection_pool/outstanding_rpcs";
98+
static final String BATCH_WRITE_FLOW_CONTROL_TARGET_QPS_NAME =
99+
"batch_write_flow_control_target_qps";
100+
static final String BATCH_WRITE_FLOW_CONTROL_FACTOR_NAME = "batch_write_flow_control_factor";
98101

99102
// Start allow list of metrics that will be exported as internal
100103
public static final Map<String, Set<String>> GRPC_METRICS =
@@ -210,6 +213,8 @@ public class BuiltinMetricsConstants {
210213
70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0, 105.0, 110.0, 115.0, 120.0, 125.0, 130.0,
211214
135.0, 140.0, 145.0, 150.0, 155.0, 160.0, 165.0, 170.0, 175.0, 180.0, 185.0, 190.0,
212215
195.0, 200.0));
216+
private static final Aggregation AGGREGATION_BATCH_WRITE_FLOW_CONTROL_FACTOR_HISTOGRAM =
217+
Aggregation.explicitBucketHistogram(ImmutableList.of(0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3));
213218

214219
static final Set<AttributeKey> COMMON_ATTRIBUTES =
215220
ImmutableSet.of(
@@ -225,7 +230,7 @@ public class BuiltinMetricsConstants {
225230
static void defineView(
226231
ImmutableMap.Builder<InstrumentSelector, View> viewMap,
227232
String id,
228-
Aggregation aggregation,
233+
@Nullable Aggregation aggregation,
229234
InstrumentType type,
230235
String unit,
231236
Set<AttributeKey> attributes) {
@@ -242,14 +247,14 @@ static void defineView(
242247
COMMON_ATTRIBUTES.stream().map(AttributeKey::getKey).collect(Collectors.toSet()))
243248
.addAll(attributes.stream().map(AttributeKey::getKey).collect(Collectors.toSet()))
244249
.build();
245-
View view =
250+
ViewBuilder viewBuilder =
246251
View.builder()
247252
.setName(METER_NAME + id)
248-
.setAggregation(aggregation)
249-
.setAttributeFilter(attributesFilter)
250-
.build();
251-
252-
viewMap.put(selector, view);
253+
.setAttributeFilter(attributesFilter);
254+
if (aggregation != null) {
255+
viewBuilder.setAggregation(aggregation);
256+
}
257+
viewMap.put(selector, viewBuilder.build());
253258
}
254259

255260
// uses cloud.BigtableClient schema
@@ -367,7 +372,23 @@ public static Map<InstrumentSelector, View> getAllViews() {
367372
.addAll(COMMON_ATTRIBUTES)
368373
.add(STREAMING_KEY, STATUS_KEY)
369374
.build());
370-
375+
defineView(
376+
views,
377+
BATCH_WRITE_FLOW_CONTROL_TARGET_QPS_NAME,
378+
null,
379+
InstrumentType.GAUGE,
380+
"1",
381+
ImmutableSet.<AttributeKey>builder().addAll(COMMON_ATTRIBUTES).build());
382+
defineView(
383+
views,
384+
BATCH_WRITE_FLOW_CONTROL_FACTOR_NAME,
385+
AGGREGATION_BATCH_WRITE_FLOW_CONTROL_FACTOR_HISTOGRAM,
386+
InstrumentType.HISTOGRAM,
387+
"1",
388+
ImmutableSet.<AttributeKey>builder()
389+
.addAll(COMMON_ATTRIBUTES)
390+
.add(STATUS_KEY, APPLIED_KEY)
391+
.build());
371392
return views.build();
372393
}
373394
}

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import static com.google.api.gax.tracing.ApiTracerFactory.OperationType;
1919
import static com.google.api.gax.util.TimeConversionUtils.toJavaTimeDuration;
20+
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.APPLIED_KEY;
2021
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.CLIENT_NAME_KEY;
2122
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.CLUSTER_ID_KEY;
2223
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.METHOD_KEY;
@@ -41,6 +42,7 @@
4142
import com.google.gson.reflect.TypeToken;
4243
import io.grpc.Deadline;
4344
import io.opentelemetry.api.common.Attributes;
45+
import io.opentelemetry.api.metrics.DoubleGauge;
4446
import io.opentelemetry.api.metrics.DoubleHistogram;
4547
import io.opentelemetry.api.metrics.LongCounter;
4648
import java.time.Duration;
@@ -136,6 +138,8 @@ static TransportAttrs create(@Nullable String locality, @Nullable String backend
136138
private final DoubleHistogram remainingDeadlineHistogram;
137139
private final LongCounter connectivityErrorCounter;
138140
private final LongCounter retryCounter;
141+
private final DoubleGauge batchWriteFlowControlTargetQps;
142+
private final DoubleHistogram batchWriteFlowControlFactorHistogram;
139143

140144
BuiltinMetricsTracer(
141145
OperationType operationType,
@@ -150,7 +154,9 @@ static TransportAttrs create(@Nullable String locality, @Nullable String backend
150154
DoubleHistogram applicationBlockingLatenciesHistogram,
151155
DoubleHistogram deadlineHistogram,
152156
LongCounter connectivityErrorCounter,
153-
LongCounter retryCounter) {
157+
LongCounter retryCounter,
158+
DoubleGauge batchWriteFlowControlTargetQps,
159+
DoubleHistogram batchWriteFlowControlFactorHistogram) {
154160
this.operationType = operationType;
155161
this.spanName = spanName;
156162
this.baseAttributes = attributes;
@@ -165,6 +171,8 @@ static TransportAttrs create(@Nullable String locality, @Nullable String backend
165171
this.remainingDeadlineHistogram = deadlineHistogram;
166172
this.connectivityErrorCounter = connectivityErrorCounter;
167173
this.retryCounter = retryCounter;
174+
this.batchWriteFlowControlTargetQps = batchWriteFlowControlTargetQps;
175+
this.batchWriteFlowControlFactorHistogram = batchWriteFlowControlFactorHistogram;
168176
}
169177

170178
@Override
@@ -496,4 +504,23 @@ private static double convertToMs(long nanoSeconds) {
496504
double toMs = 1e-6;
497505
return nanoSeconds * toMs;
498506
}
507+
508+
@Override
509+
public void setBatchWriteFlowControlTargetQps(double targetQps) {
510+
Attributes attributes = baseAttributes.toBuilder().put(METHOD_KEY, spanName.toString()).build();
511+
512+
batchWriteFlowControlTargetQps.set(targetQps, attributes);
513+
}
514+
515+
@Override
516+
public void addBatchWriteFlowControlFactor(double factor, String statusString, boolean applied) {
517+
Attributes attributes =
518+
baseAttributes.toBuilder()
519+
.put(METHOD_KEY, spanName.toString())
520+
.put(STATUS_KEY, statusString)
521+
.put(APPLIED_KEY, applied)
522+
.build();
523+
524+
batchWriteFlowControlFactorHistogram.record(factor, attributes);
525+
}
499526
}

0 commit comments

Comments
 (0)