File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -323,9 +323,8 @@ export async function preprocessExecution(
323323 }
324324
325325 // ========== STEPS 3.5–6: Preflight Gates ==========
326- // Read-only gates run concurrently (ban + subscription, then usage). The
327- // rate-limit gate debits a token, so it runs sequentially only after ban and
328- // usage pass. Failures apply in fixed precedence: ban 403 → usage 402 → rate 429.
326+ // Read-only gates (ban, subscription, usage) run concurrently; the stateful
327+ // rate-limit gate runs after they pass. Precedence: ban 403 → usage 402 → rate 429.
329328
330329 /**
331330 * A failing gate's deferred outcome: the response to return, plus an optional
@@ -643,7 +642,6 @@ export async function preprocessExecution(
643642 const usageResult = await usageCheckTask
644643 const usageSnapshot = usageResult . snapshot
645644
646- // Precedence: ban (403) wins over usage (402).
647645 const readGateFailure = banFailure ?? usageResult . failure
648646 if ( readGateFailure ) {
649647 if ( readGateFailure . recordError ) {
Original file line number Diff line number Diff line change @@ -139,8 +139,6 @@ export const bedrockProvider: ProviderConfig = {
139139 }
140140 }
141141
142- // AWS SDK clients own a per-client connection pool and are meant to be reused.
143- // Keyed by region + credential identity (a rotated key pair yields a new key).
144142 const client = getCachedProviderClient (
145143 `bedrock::${ region } ::${ request . bedrockAccessKeyId ?? 'default-chain' } ` ,
146144 ( ) => new BedrockRuntimeClient ( clientConfig )
Original file line number Diff line number Diff line change @@ -3,8 +3,10 @@ import { LRUCache } from 'lru-cache'
33const CLIENT_CACHE_MAX_ENTRIES = 1_000
44const CLIENT_CACHE_TTL_MS = 30 * 60 * 1_000
55
6- // updateAgeOnGet makes the TTL idle-based, so a continuously-used client keeps
7- // its warm keep-alive connections while idle keys age out.
6+ /**
7+ * `updateAgeOnGet` makes the TTL idle-based: a continuously-used client keeps its
8+ * warm keep-alive connections, while idle keys age out.
9+ */
810const clientCache = new LRUCache < string , object > ( {
911 max : CLIENT_CACHE_MAX_ENTRIES ,
1012 ttl : CLIENT_CACHE_TTL_MS ,
Original file line number Diff line number Diff line change @@ -135,8 +135,6 @@ export const vllmProvider: ProviderConfig = {
135135 }
136136
137137 const apiKey = request . apiKey || env . VLLM_API_KEY || 'empty'
138- // A pinned endpoint gets its own undici Agent, so reuse keeps connections
139- // warm. DNS re-validation still runs every request; a new IP rekeys.
140138 const vllm = getCachedProviderClient (
141139 `vllm::${ apiKey } ::${ baseUrl } ::${ pinnedIP ?? 'no-pin' } ` ,
142140 ( ) =>
You can’t perform that action at this time.
0 commit comments