Skip to content

Commit 980aaf9

Browse files
committed
perf(providers): memoize SDK clients where the pool is per-client (bedrock, vllm)
Generalize the Anthropic client cache into one shared memoizer (providers/client-cache.ts) and apply it only where each new client owns its own connection pool — so reuse actually keeps connections warm: - bedrock: AWS SDK clients hold a per-client connection pool (reuse is the AWS best practice). Keyed by region + credential identity. - vllm: a pinned endpoint creates its own undici Agent per call; key by the resolved IP so DNS re-validation still runs each request. - anthropic + azure-anthropic: migrated onto the shared memoizer. Deliberately NOT applied to the OpenAI-compatible providers, groq, cerebras, or google: their SDKs share a process-global keep-alive pool (Node openai-sdk module singleton agent; anthropic/global undici), so a fresh client per request already reuses connections and memoization would add complexity with ~no benefit. litellm uses a plain shared-agent client (no pinning) and is likewise skipped. Bounded LRU (max 1000, 30m idle TTL) with no close-on-eviction, avoiding the unbounded-growth and eviction-closes-in-use-client failure modes seen in similar client caches.
1 parent b87f870 commit 980aaf9

8 files changed

Lines changed: 181 additions & 218 deletions

File tree

apps/sim/providers/anthropic/client-cache.test.ts

Lines changed: 0 additions & 162 deletions
This file was deleted.

apps/sim/providers/anthropic/client-cache.ts

Lines changed: 0 additions & 44 deletions
This file was deleted.

apps/sim/providers/anthropic/index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import Anthropic from '@anthropic-ai/sdk'
22
import { createLogger } from '@sim/logger'
33
import type { StreamingExecution } from '@/executor/types'
4-
import { getCachedAnthropicClient } from '@/providers/anthropic/client-cache'
54
import { executeAnthropicProviderRequest } from '@/providers/anthropic/core'
5+
import { getCachedProviderClient } from '@/providers/client-cache'
66
import { getProviderDefaultModel, getProviderModels } from '@/providers/models'
77
import type { ProviderConfig, ProviderRequest, ProviderResponse } from '@/providers/types'
88

@@ -23,8 +23,8 @@ export const anthropicProvider: ProviderConfig = {
2323
providerId: 'anthropic',
2424
providerLabel: 'Anthropic',
2525
createClient: (apiKey, useNativeStructuredOutputs) => {
26-
const cacheKey = `${apiKey}::${useNativeStructuredOutputs ? 'beta' : 'default'}`
27-
return getCachedAnthropicClient(
26+
const cacheKey = `anthropic::${apiKey}::${useNativeStructuredOutputs ? 'beta' : 'default'}`
27+
return getCachedProviderClient(
2828
cacheKey,
2929
() =>
3030
new Anthropic({

apps/sim/providers/azure-anthropic/index.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ import { createLogger } from '@sim/logger'
33
import { env } from '@/lib/core/config/env'
44
import { createPinnedFetch, validateUrlWithDNS } from '@/lib/core/security/input-validation.server'
55
import type { StreamingExecution } from '@/executor/types'
6-
import { getCachedAnthropicClient } from '@/providers/anthropic/client-cache'
76
import { executeAnthropicProviderRequest } from '@/providers/anthropic/core'
7+
import { getCachedProviderClient } from '@/providers/client-cache'
88
import { getProviderDefaultModel, getProviderModels } from '@/providers/models'
99
import type { ProviderConfig, ProviderRequest, ProviderResponse } from '@/providers/types'
1010

@@ -73,13 +73,14 @@ export const azureAnthropicProvider: ProviderConfig = {
7373
providerLabel: 'Azure Anthropic',
7474
createClient: (apiKey, useNativeStructuredOutputs) => {
7575
const cacheKey = [
76+
'azure-anthropic',
7677
apiKey,
7778
baseURL,
7879
anthropicVersion,
7980
pinnedIP ?? 'no-pin',
8081
useNativeStructuredOutputs ? 'beta' : 'default',
8182
].join('::')
82-
return getCachedAnthropicClient(
83+
return getCachedProviderClient(
8384
cacheKey,
8485
() =>
8586
new Anthropic({

apps/sim/providers/bedrock/index.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import {
2424
generateToolUseId,
2525
getBedrockInferenceProfileId,
2626
} from '@/providers/bedrock/utils'
27+
import { getCachedProviderClient } from '@/providers/client-cache'
2728
import { getProviderDefaultModel, getProviderModels } from '@/providers/models'
2829
import { createStreamingExecution } from '@/providers/streaming-execution'
2930
import { enrichLastModelSegment } from '@/providers/trace-enrichment'
@@ -138,7 +139,14 @@ export const bedrockProvider: ProviderConfig = {
138139
}
139140
}
140141

141-
const client = new BedrockRuntimeClient(clientConfig)
142+
// Memoized: each BedrockRuntimeClient owns its own connection pool (AWS SDK
143+
// best practice is to reuse the client), so reusing it keeps connections warm
144+
// across requests. Keyed by region + credential identity (a rotated key pair
145+
// changes the access key id and so yields a fresh client).
146+
const client = getCachedProviderClient(
147+
`bedrock::${region}::${request.bedrockAccessKeyId ?? 'default-chain'}`,
148+
() => new BedrockRuntimeClient(clientConfig)
149+
)
142150

143151
const messages: BedrockMessage[] = []
144152
const systemContent: SystemContentBlock[] = []
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/**
2+
* @vitest-environment node
3+
*/
4+
import { describe, expect, it, vi } from 'vitest'
5+
import { getCachedProviderClient } from '@/providers/client-cache'
6+
7+
/**
8+
* Builds a fresh fake "client" object on every call so identity comparisons
9+
* (`toBe`) tell us whether the cache returned the memoized instance or a new one
10+
* from the factory. We never construct a real SDK client — these tests exercise
11+
* the cache, not any provider SDK.
12+
*/
13+
function makeFactory() {
14+
return vi.fn(() => ({}) as object)
15+
}
16+
17+
/**
18+
* Generates a unique suffix per test so distinct tests never collide on cache
19+
* keys. The cache util exposes no reset hook, so isolation is achieved by
20+
* namespacing keys rather than clearing shared state.
21+
*/
22+
let keyCounter = 0
23+
function uniqueNs(): string {
24+
keyCounter += 1
25+
return `ns-${keyCounter}-${Date.now()}`
26+
}
27+
28+
describe('getCachedProviderClient', () => {
29+
it('returns the SAME instance for an identical key and runs the factory once (memoized)', () => {
30+
const key = `anthropic::${uniqueNs()}::default`
31+
const factory = makeFactory()
32+
33+
const first = getCachedProviderClient(key, factory)
34+
const second = getCachedProviderClient(key, factory)
35+
36+
expect(second).toBe(first)
37+
expect(factory).toHaveBeenCalledTimes(1)
38+
})
39+
40+
it('returns a DIFFERENT instance for a different apiKey (tenant isolation)', () => {
41+
const ns = uniqueNs()
42+
const factoryA = makeFactory()
43+
const factoryB = makeFactory()
44+
45+
const tenantA = getCachedProviderClient(`anthropic::${ns}-tenant-a::default`, factoryA)
46+
const tenantB = getCachedProviderClient(`anthropic::${ns}-tenant-b::default`, factoryB)
47+
48+
expect(tenantB).not.toBe(tenantA)
49+
expect(factoryA).toHaveBeenCalledTimes(1)
50+
expect(factoryB).toHaveBeenCalledTimes(1)
51+
})
52+
53+
it('namespaces by provider: the same apiKey under different provider prefixes does not collide', () => {
54+
const ns = uniqueNs()
55+
const apiKey = `${ns}-shared-key`
56+
const anthropicFactory = makeFactory()
57+
const bedrockFactory = makeFactory()
58+
59+
const anthropicClient = getCachedProviderClient(`anthropic::${apiKey}`, anthropicFactory)
60+
const bedrockClient = getCachedProviderClient(`bedrock::${apiKey}`, bedrockFactory)
61+
62+
expect(bedrockClient).not.toBe(anthropicClient)
63+
})
64+
65+
it('treats every distinct key dimension as a distinct client', () => {
66+
const ns = uniqueNs()
67+
const base = `azure-anthropic::${ns}-key::https://a.example.com::2023-06-01::10.0.0.1::default`
68+
const baseFactory = makeFactory()
69+
const baseClient = getCachedProviderClient(base, baseFactory)
70+
71+
const variants = [
72+
`azure-anthropic::${ns}-key::https://b.example.com::2023-06-01::10.0.0.1::default`,
73+
`azure-anthropic::${ns}-key::https://a.example.com::2024-10-22::10.0.0.1::default`,
74+
`azure-anthropic::${ns}-key::https://a.example.com::2023-06-01::10.0.0.2::default`,
75+
`azure-anthropic::${ns}-key::https://a.example.com::2023-06-01::no-pin::default`,
76+
`azure-anthropic::${ns}-key::https://a.example.com::2023-06-01::10.0.0.1::beta`,
77+
]
78+
79+
for (const key of variants) {
80+
const factory = makeFactory()
81+
const client = getCachedProviderClient(key, factory)
82+
expect(client).not.toBe(baseClient)
83+
expect(factory).toHaveBeenCalledTimes(1)
84+
}
85+
})
86+
87+
it('evicts the least-recently-used entry once the cache cap is exceeded', () => {
88+
const ns = uniqueNs()
89+
const CAP = 1_000
90+
91+
const oldestKey = `evict::${ns}::0`
92+
const oldestFactory = makeFactory()
93+
getCachedProviderClient(oldestKey, oldestFactory)
94+
expect(oldestFactory).toHaveBeenCalledTimes(1)
95+
96+
// Fill the remaining capacity, then push one past the cap. The oldest key has
97+
// not been touched since insertion, so it is the LRU eviction victim.
98+
for (let i = 1; i <= CAP; i += 1) {
99+
getCachedProviderClient(`evict::${ns}::${i}`, makeFactory())
100+
}
101+
102+
const reFactory = makeFactory()
103+
getCachedProviderClient(oldestKey, reFactory)
104+
expect(reFactory).toHaveBeenCalledTimes(1)
105+
expect(oldestFactory).toHaveBeenCalledTimes(1)
106+
})
107+
})

0 commit comments

Comments
 (0)