diff --git a/src/cli/index.ts b/src/cli/index.ts index b3c29d6..f86751f 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -77,12 +77,19 @@ Available providers for storing and retrieving memories: Extracts memories via LLM, chunks + embeds extracted content, hybrid BM25 + vector search. Requires: OPENAI_API_KEY (for memory extraction via gpt-4o-mini + embeddings) + sandra Sandra - Semantic graph database with typed refs and entity factories. + Extracts entities + facts per session via Claude, pushes one sandra_batch per session, + retrieves via semantic search on the lme_fact factory scoped by instance_id. + Requires: ANTHROPIC_API_KEY (extractor) + SANDRA_URL (MCP HTTP server, default http://localhost:8090/mcp) + Optional: SANDRA_TOKEN (bearer token for auth), SANDRA_EXTRACTOR_MODEL (default claude-haiku-4-5-20251001) + Usage: -p supermemory Use Supermemory as the memory provider -p mem0 Use Mem0 as the memory provider -p zep Use Zep as the memory provider -p filesystem Use file-based memory (CLAUDE.md style) -p rag Use hybrid RAG memory (OpenClaw/QMD style) + -p sandra Use Sandra semantic graph database `) } diff --git a/src/providers/index.ts b/src/providers/index.ts index 5f71566..c33c74f 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -5,6 +5,7 @@ import { Mem0Provider } from "./mem0" import { ZepProvider } from "./zep" import { FilesystemProvider } from "./filesystem" import { RAGProvider } from "./rag" +import { SandraProvider } from "./sandra" const providers: Record Provider> = { supermemory: SupermemoryProvider, @@ -12,6 +13,7 @@ const providers: Record Provider> = { zep: ZepProvider, filesystem: FilesystemProvider, rag: RAGProvider, + sandra: SandraProvider, } export function createProvider(name: ProviderName): Provider { @@ -39,4 +41,4 @@ export function getProviderInfo(name: ProviderName): { } } -export { SupermemoryProvider, Mem0Provider, ZepProvider, FilesystemProvider, RAGProvider } +export { SupermemoryProvider, Mem0Provider, ZepProvider, FilesystemProvider, RAGProvider, SandraProvider } diff --git a/src/providers/sandra/extractor.ts b/src/providers/sandra/extractor.ts new file mode 100644 index 0000000..64f65cc --- /dev/null +++ b/src/providers/sandra/extractor.ts @@ -0,0 +1,207 @@ +/** + * LLM-based session extractor. Calls Claude Haiku via the Vercel AI SDK + * (already a memorybench dependency) to convert raw chat turns into the + * {entities, facts} JSON that the Sandra graph writer consumes. + * + * Mirrors `sandra/benchmark/longmemeval/src/ingest_claude.py`: + * - one API call per session + * - parse JSON from the response, stripping markdown fences if present + * - one retry with a stricter instruction on JSONDecodeError + * + * Model default is `claude-haiku-4-5-20251001` to match the Python default. + * Override via the `SANDRA_EXTRACTOR_MODEL` env var if needed. + */ + +import { generateText, type LanguageModel } from "ai" +import { createAnthropic } from "@ai-sdk/anthropic" +import { createOpenAI } from "@ai-sdk/openai" +import type { UnifiedMessage } from "../../types/unified" +import { logger } from "../../utils/logger" +import { EXTRACT_SYSTEM_PROMPT } from "./prompts" +import type { SessionExtraction } from "./schema" + +const DEFAULT_ANTHROPIC_MODEL = "claude-haiku-4-5-20251001" +const DEFAULT_OPENAI_MODEL = "gpt-4o-mini" +const MAX_OUTPUT_TOKENS = 8000 + +export interface ExtractorConfig { + apiKey: string // Anthropic key when present; falls through to OPENAI_API_KEY if empty. + model?: string +} + +export interface ExtractorStats { + inputTokens: number + outputTokens: number + apiCalls: number + parseFailures: number +} + +/** + * Dual-provider extractor that mirrors the fallback logic of the Python + * benchmark's `llm_client.py`: prefer Anthropic (Claude Haiku) when an API + * key is configured, otherwise fall back to OpenAI (gpt-4o-mini). The model + * name can be overridden via the `SANDRA_EXTRACTOR_MODEL` env var. + */ +export class SessionExtractor { + private readonly model: LanguageModel + readonly modelName: string + readonly providerName: "anthropic" | "openai" + readonly stats: ExtractorStats = { + inputTokens: 0, + outputTokens: 0, + apiCalls: 0, + parseFailures: 0, + } + + constructor(config: ExtractorConfig) { + const override = config.model || process.env.SANDRA_EXTRACTOR_MODEL + const anthropicKey = config.apiKey || process.env.ANTHROPIC_API_KEY || "" + const openaiKey = process.env.OPENAI_API_KEY || "" + + if (anthropicKey) { + const provider = createAnthropic({ apiKey: anthropicKey }) + this.modelName = override || DEFAULT_ANTHROPIC_MODEL + this.model = provider(this.modelName) + this.providerName = "anthropic" + } else if (openaiKey) { + const provider = createOpenAI({ apiKey: openaiKey }) + this.modelName = override || DEFAULT_OPENAI_MODEL + this.model = provider(this.modelName) + this.providerName = "openai" + } else { + throw new Error( + "Sandra extractor needs an LLM key: set ANTHROPIC_API_KEY (Claude Haiku) or OPENAI_API_KEY (gpt-4o-mini)." + ) + } + logger.info( + `Sandra extractor using ${this.providerName} / ${this.modelName}` + ) + } + + async extract( + messages: UnifiedMessage[], + sessionTimestamp?: string + ): Promise { + const prompt = this.buildSessionPrompt(messages, sessionTimestamp) + const first = await this.callModel(EXTRACT_SYSTEM_PROMPT, prompt) + + const parsed = tryParseJson(first) + if (parsed) return sanitize(parsed) + + this.stats.parseFailures += 1 + logger.debug("Extractor got non-JSON response; retrying once") + + const second = await this.callModel( + EXTRACT_SYSTEM_PROMPT + + "\nCRITICAL: Output valid JSON only. No prose, no markdown fences.", + prompt + "\n\nPrevious output was not valid JSON. Re-emit valid JSON only." + ) + const retried = tryParseJson(second) + if (retried) return sanitize(retried) + + logger.warn("Extractor failed twice; returning empty extraction") + return { entities: [], facts: [] } + } + + private buildSessionPrompt(messages: UnifiedMessage[], ts?: string): string { + const header = ts ? `Session timestamp: ${ts}\n\n` : "" + const lines = messages.map((m, i) => `[turn ${i} ${m.role}]\n${m.content}`) + return header + lines.join("\n\n") + } + + private async callModel(system: string, prompt: string): Promise { + const result = await generateText({ + model: this.model, + system, + prompt, + maxOutputTokens: MAX_OUTPUT_TOKENS, + }) + this.stats.apiCalls += 1 + const usage = result.usage + if (usage) { + this.stats.inputTokens += usage.inputTokens ?? 0 + this.stats.outputTokens += usage.outputTokens ?? 0 + } + return (result.text ?? "").trim() + } +} + +function tryParseJson(text: string): SessionExtraction | null { + let body = text.trim() + if (body.startsWith("```")) { + // strip leading fence incl. optional language tag + body = body.replace(/^```[a-zA-Z]*\n?/, "") + if (body.endsWith("```")) body = body.slice(0, -3) + body = body.trim() + } + if (!body.startsWith("{")) { + // some models prefix a single explanatory line; grab the first {...} block + const match = body.match(/\{[\s\S]*\}/) + if (!match) return null + body = match[0] + } + try { + const obj = JSON.parse(body) + if (!obj || typeof obj !== "object") return null + const entities = Array.isArray(obj.entities) ? obj.entities : [] + const facts = Array.isArray(obj.facts) ? obj.facts : [] + return { entities, facts } + } catch { + return null + } +} + +function sanitize(raw: SessionExtraction): SessionExtraction { + const entities = raw.entities + .filter((e) => e && typeof e.name === "string" && e.name.trim() !== "") + .map((e) => ({ + name: String(e.name).trim(), + kind: (e.kind && String(e.kind)) || "other", + notes: e.notes ? String(e.notes).slice(0, 240) : "", + })) + + const facts = raw.facts + .filter( + (f) => + f && + typeof f.predicate === "string" && + f.predicate.trim() !== "" && + typeof f.statement === "string" && + f.statement.trim() !== "" + ) + .map((f) => { + const typedRefs: Record = {} + const rawTyped = (f as { typed_refs?: unknown }).typed_refs + if (rawTyped && typeof rawTyped === "object") { + for (const [k, v] of Object.entries(rawTyped as Record)) { + const num = Number(v) + if (!Number.isNaN(num) && Number.isFinite(num) && k.trim() !== "") { + typedRefs[k.trim()] = num + } + } + } + let turnIdx = 0 + const rawTurn = (f as { turn_idx?: unknown }).turn_idx + if (typeof rawTurn === "number") turnIdx = Math.floor(rawTurn) + else if (typeof rawTurn === "string") { + const n = parseInt(rawTurn, 10) + if (!Number.isNaN(n)) turnIdx = n + } + return { + predicate: String(f.predicate).trim(), + statement: String(f.statement), + subject: (f.subject && String(f.subject)) || "user", + source: ((f.source && String(f.source)) || "user") as + | "user" + | "assistant" + | "synthesis", + object: (f.object && String(f.object)) || "", + value: (f.value && String(f.value)) || "", + event_date: (f.event_date && String(f.event_date)) || "", + turn_idx: turnIdx, + typed_refs: typedRefs, + } + }) + + return { entities, facts } +} diff --git a/src/providers/sandra/index.ts b/src/providers/sandra/index.ts new file mode 100644 index 0000000..ec07299 --- /dev/null +++ b/src/providers/sandra/index.ts @@ -0,0 +1,645 @@ +/** + * SandraProvider — memorybench adapter for the Sandra semantic graph database. + * + * Design notes: + * - Ingestion mirrors the Python harness's two-stage pipeline: LLM-extract + * entities + facts from each session, then push one sandra_batch per + * session. The extracted `{instance_id=containerTag}` ref is how + * per-question memory scoping is enforced at retrieval time. + * - Retrieval is single-shot: sandra_semantic_search on lme_fact storage, + * client-filter by instance_id == containerTag, return top-N. No + * multi-turn tool-use agent — that would be unfair vs Mem0/Zep, which + * return static search results. This is Sandra's out-of-the-box behavior. + * - clear() is a best-effort no-op like Supermemory's — test runs use a + * dedicated DB (SANDRA_DB=benchmark_mb) so physical isolation comes from + * the database, not per-containerTag deletion. + */ + +import type { + Provider, + ProviderConfig, + IngestOptions, + IngestResult, + SearchOptions, + IndexingProgressCallback, +} from "../../types/provider" +import type { UnifiedSession } from "../../types/unified" +import { logger } from "../../utils/logger" +import { SandraMCPClient, type SandraEntityHit } from "./mcp-client" +import { SessionExtractor } from "./extractor" +import { + FACTORY_ENTITY, + FACTORY_FACT, + FACTORY_SESSION, + FACTORY_SESSION_RAW, + VERB_ABOUT, + VERB_MENTIONS, + VERB_STATES, + type ExtractedEntity, + type ExtractedFact, +} from "./schema" +import { SANDRA_PROMPTS, type SandraSearchHit } from "./prompts" + +const DEFAULT_SANDRA_URL = process.env.SANDRA_URL || "http://localhost:8090/mcp" +const DEFAULT_SANDRA_TOKEN = process.env.SANDRA_TOKEN || "" + +export class SandraProvider implements Provider { + name = "sandra" + prompts = SANDRA_PROMPTS + concurrency = { + default: 4, + ingest: 4, + indexing: 1, + // Sandra's MCP HTTP server is a single-threaded PHP process; bursty + // parallel search requests can drop keep-alive connections. Keep search + // concurrency modest — the MCP client also retries on socket errors. + search: 2, + } + + private mcp: SandraMCPClient | null = null + private extractor: SessionExtractor | null = null + // One global batch-embed covers all containerTags because the factory-level + // embed_all walks every unembedded `lme_fact` entity, regardless of + // instance_id. We run it once per provider lifetime; subsequent + // awaitIndexing() calls piggy-back on the same promise. + private globalEmbedPromise: Promise | null = null + + async initialize(config: ProviderConfig): Promise { + const baseUrl = (config.baseUrl as string | undefined) || DEFAULT_SANDRA_URL + const token = (config.token as string | undefined) || DEFAULT_SANDRA_TOKEN || undefined + this.mcp = new SandraMCPClient({ url: baseUrl, token }) + this.extractor = new SessionExtractor({ apiKey: config.apiKey }) + logger.info(`Initialized Sandra provider (url=${baseUrl})`) + } + + async ingest( + sessions: UnifiedSession[], + options: IngestOptions + ): Promise { + if (!this.mcp || !this.extractor) throw new Error("Provider not initialized") + + const documentIds: string[] = [] + const INNER_CONCURRENCY = Math.max(1, Number(process.env.SANDRA_SESSION_PARALLELISM ?? 6)) + + // Run sessions in parallel chunks. A single LongMemEval question can have + // 40+ sessions; serial ingestion means Sandra spends most of its time + // idle. Cap with SANDRA_SESSION_PARALLELISM (default 6) to avoid pounding + // the LLM extractor or the MCP server. + for (let i = 0; i < sessions.length; i += INNER_CONCURRENCY) { + const batch = sessions.slice(i, i + INNER_CONCURRENCY) + const results = await Promise.all(batch.map((s) => this.ingestOneSession(options.containerTag, s))) + for (const ids of results) documentIds.push(...ids) + } + + return { documentIds } + } + + private async ingestOneSession( + containerTag: string, + session: UnifiedSession + ): Promise { + if (!this.mcp || !this.extractor) throw new Error("Provider not initialized") + const formattedDate = (session.metadata?.formattedDate as string) || "" + const isoDate = (session.metadata?.date as string) || "" + const sessionTimestamp = formattedDate || isoDate + + // Always dump the raw session transcript as an lme_session_raw entity, + // even if the structured extraction fails later. The raw storage acts + // as a verbatim fallback that semantic search can hit when the + // extracted fact graph misses a detail. + const rawTranscript = renderSessionTranscript(session, sessionTimestamp) + + let extracted + try { + extracted = await this.extractor.extract(session.messages, sessionTimestamp) + } catch (e) { + logger.warn(`Extraction failed for ${session.sessionId}: ${e}`) + // Still push the raw transcript so retrieval at least has something. + await this.pushRawSessionOnly(containerTag, session.sessionId, sessionTimestamp, rawTranscript).catch(() => undefined) + return [] + } + + const batchPayload = buildBatchPayload( + containerTag, + session.sessionId, + sessionTimestamp, + extracted.entities, + extracted.facts, + rawTranscript + ) + + try { + const result = await this.mcp.batch(batchPayload) + const created = result.summary?.entitiesCreated ?? 0 + const ids: string[] = [] + for (let i = 0; i < created; i++) { + ids.push(`${session.sessionId}:${i}`) + } + return ids + } catch (e) { + logger.warn(`sandra_batch failed for ${session.sessionId}: ${e}`) + return [] + } + } + + private async pushRawSessionOnly( + containerTag: string, + sessionId: string, + sessionTimestamp: string, + rawTranscript: string + ): Promise { + if (!this.mcp) return + if (!rawTranscript) return + await this.mcp.batch({ + concepts: [], + entities: [ + { + factory: FACTORY_SESSION_RAW, + refs: { + session_id: sessionId, + instance_id: containerTag, + timestamp: sessionTimestamp, + }, + storage: rawTranscript, + }, + ], + triplets: [], + }) + } + + async awaitIndexing( + _result: IngestResult, + containerTag: string, + onProgress?: IndexingProgressCallback + ): Promise { + if (!this.mcp) throw new Error("Provider not initialized") + + onProgress?.({ completedIds: [], failedIds: [], total: 1 }) + // Kick off (or join) the single batch-embed pass for the whole run. + if (this.globalEmbedPromise === null) { + this.globalEmbedPromise = this.runBatchEmbed() + } + try { + const total = await this.globalEmbedPromise + logger.debug( + `sandra_embed_all(lme_fact) total embedded=${total} (container=${containerTag})` + ) + } catch (e) { + logger.warn(`sandra_embed_all failed: ${e}`) + } + onProgress?.({ completedIds: ["embed"], failedIds: [], total: 1 }) + } + + private async runBatchEmbed(): Promise { + if (!this.mcp) throw new Error("Provider not initialized") + // Per-page size must be small enough for a single embed_all call to + // finish within the MCP client timeout. Each embedding takes + // ~200-400ms, so PAGE_LIMIT=200 → page ≤ 80s — comfortably under the + // 10-min client timeout. + const PAGE_LIMIT = Math.max( + 50, + Number(process.env.SANDRA_EMBED_PAGE ?? 200) + ) + let totalEmbedded = 0 + // Embed every factory that the search path might hit via semantic + // search. Without this, new session_raw entities created with + // SANDRA_SKIP_AUTO_EMBED=1 would never get an embedding and the + // fallback retrieval would silently return empty. + const factories = [FACTORY_FACT, FACTORY_ENTITY, FACTORY_SESSION_RAW] + for (const factory of factories) { + let pageIdx = 0 + while (true) { + pageIdx += 1 + const resp = await this.mcp.embedAll({ factory, limit: PAGE_LIMIT }) + const n = resp.embedded ?? 0 + totalEmbedded += n + logger.info( + `sandra_embed_all(${factory}) page ${pageIdx}: embedded=${n}, total=${totalEmbedded}` + ) + if (n === 0) break + } + } + return totalEmbedded + } + + async search(query: string, options: SearchOptions): Promise { + if (!this.mcp) throw new Error("Provider not initialized") + // Ignore memorybench's default limit=10 — we want a wider context window + // for the answerer. Respect an explicit caller-provided limit if set > 10. + const limit = Math.max(options.limit ?? 10, 50) + + // Instance-scoped retrieval strategy: + // + // 1. Fetch ALL facts tagged with this instance_id via + // `sandra_search(field=instance_id)`. This is full-recall for the + // question's scope — no cross-instance top-K dilution. + // WITHOUT storage to keep the response small (storage text can be + // several KB per fact × 500 facts → multi-MB response that drops + // the keep-alive connection). We fetch storage in a second pass + // only for the top-N after ranking. + // 2. Also fetch entities tagged with this instance_id. + // 3. Run semantic_search on the main query globally, then use the ranking + // ONLY to promote relevant items from within the instance-scoped pool. + // 4. The final list is instance-scoped facts+entities, reordered so that + // items semantically closest to the query come first. + // + // Why not filter `sandra_semantic_search` results by instance_id? + // Because that operation is a top-K over the global factory; if another + // instance's content is a better semantic match, we lose our own + // candidates before we can look at them. Instance-scoped first, ranked + // after. + // Instance-scoped pool size. Kept moderate so the response payload + // stays under Sandra's HTTP keep-alive comfort zone — going too high + // (tried 600) drops the socket mid-transfer. + const POOL_LIMIT = 300 + + // Extended retrieval now also covers `lme_session_raw` — the verbatim + // transcript fallback. Semantic search on these hits finds details the + // LLM extractor skipped. We also do a scoped search on session_raw to + // guarantee at least a baseline presence in the pool — a global top-K + // over this factory often returns raw sessions from other instances. + const [factPool, entityPool, rawPool, semHits, semEntityHits, semRawHits] = await Promise.all([ + this.mcp + .search({ + query: options.containerTag, + factory: FACTORY_FACT, + field: "instance_id", + limit: POOL_LIMIT, + include_storage: false, // keep response lean + }) + .catch((e) => { + logger.warn(`scoped lme_fact search failed: ${e}`) + return { items: [] as SandraEntityHit[] } + }), + this.mcp + .search({ + query: options.containerTag, + factory: FACTORY_ENTITY, + field: "instance_id", + limit: POOL_LIMIT, + include_storage: false, + }) + .catch((e) => { + logger.warn(`scoped lme_entity search failed: ${e}`) + return { items: [] as SandraEntityHit[] } + }), + // Scoped raw sessions: we need the transcripts (storage) but the + // payload size is the critical constraint — even ~60 sessions of + // 5-15KB each drops the HTTP keep-alive. Fetch just 20, then rely on + // semantic ranking (semRawHits) to surface the most-relevant raw + // sessions for this query. + this.mcp + .search({ + query: options.containerTag, + factory: FACTORY_SESSION_RAW, + field: "instance_id", + limit: 20, + include_storage: true, + }) + .catch((e) => { + logger.warn(`scoped lme_session_raw search failed: ${e}`) + return { items: [] as SandraEntityHit[] } + }), + this.mcp + .semanticSearch({ + query, + factory: FACTORY_FACT, + limit: 200, + threshold: options.threshold ?? 0.0, + include_storage: true, // semantic hits already bring storage + }) + .catch((e) => { + logger.warn(`semantic_search lme_fact failed: ${e}`) + return { results: [] as SandraEntityHit[] } + }), + this.mcp + .semanticSearch({ + query, + factory: FACTORY_ENTITY, + limit: 60, + threshold: options.threshold ?? 0.0, + include_storage: true, + }) + .catch((e) => { + logger.warn(`semantic_search lme_entity failed: ${e}`) + return { results: [] as SandraEntityHit[] } + }), + this.mcp + .semanticSearch({ + query, + factory: FACTORY_SESSION_RAW, + limit: 12, + threshold: options.threshold ?? 0.0, + include_storage: true, + }) + .catch((e) => { + logger.warn(`semantic_search lme_session_raw failed: ${e}`) + return { results: [] as SandraEntityHit[] } + }), + ]) + + const poolById = new Map() + // Seed pool from the instance-scoped searches (guaranteed instance_id). + for (const r of [factPool, entityPool, rawPool]) { + for (const h of normalizeHits(r)) { + if (typeof h.id !== "number") continue + poolById.set(h.id, h) + } + } + // Merge semantic hits into the pool only when they match instance_id. + // These bring storage (the `statement` field) that the scoped search + // did not fetch. For items already in the pool, upgrade their entry + // with the richer semantic hit (keeps the storage). + for (const r of [semHits, semEntityHits, semRawHits]) { + for (const h of normalizeHits(r)) { + if (typeof h.id !== "number") continue + if (h.refs?.instance_id !== options.containerTag) continue + poolById.set(h.id, h) + } + } + + // Build semantic rank: earlier in the sem list → higher rank. + const semRank = new Map() + let rankCounter = 0 + for (const r of [semRawHits, semHits, semEntityHits]) { + for (const h of normalizeHits(r)) { + if (typeof h.id !== "number") continue + if (!poolById.has(h.id)) continue + if (!semRank.has(h.id)) semRank.set(h.id, rankCounter++) + } + } + + // Split the limit budget: guarantee a fixed slice for raw-session + // transcripts (fallback coverage) and a fixed slice for facts+entities + // (structured extraction). The raw budget is small — a couple of + // transcripts are enough to let the answerer cross-check details the + // extractor missed — but it MUST be non-zero or the whole mechanism + // is invisible. + const RAW_BUDGET = Math.min(8, Math.floor(limit * 0.15) + 2) + + const rawHits: SandraEntityHit[] = [] + const structuredHits: SandraEntityHit[] = [] + for (const h of poolById.values()) { + if (h.factory === FACTORY_SESSION_RAW) rawHits.push(h) + else structuredHits.push(h) + } + const semSort = (a: SandraEntityHit, b: SandraEntityHit) => { + const ra = semRank.has(a.id!) ? semRank.get(a.id!)! : Infinity + const rb = semRank.has(b.id!) ? semRank.get(b.id!)! : Infinity + if (ra !== rb) return ra - rb + const af = a.refs?.predicate ? 0 : 1 + const bf = b.refs?.predicate ? 0 : 1 + return af - bf + } + rawHits.sort(semSort) + structuredHits.sort(semSort) + + // Only keep raw hits that actually carry storage (the transcript). + // Raw pool entries without storage are useless to the answerer. + const rawWithStorage = rawHits.filter((h) => h.storage && h.storage.length > 0) + const rawSlice = rawWithStorage.slice(0, RAW_BUDGET) + const structuredSlice = structuredHits.slice(0, limit - rawSlice.length) + + // Interleave so raw transcripts show up near the top of the prompt — + // the answerer is more likely to cite them when they're not buried. + const ranked = [...rawSlice, ...structuredSlice] + return ranked.map((h) => hitToSearchResult(h)) + } + + async clear(containerTag: string): Promise { + // Sandra's MCP surface does not currently expose a bulk delete-by-ref. + // Benchmark runs isolate via a dedicated database (SANDRA_DB=benchmark_mb). + // We log a warning to make this explicit when the runner calls clear(). + logger.warn( + `Sandra clear() is a no-op; per-containerTag deletion not exposed. Use a dedicated DB for isolation. (containerTag=${containerTag})` + ) + } +} + +function buildBatchPayload( + instanceId: string, + sessionId: string, + sessionTimestamp: string, + entities: ExtractedEntity[], + facts: ExtractedFact[], + rawTranscript: string +) { + const sessionEntity = { + factory: FACTORY_SESSION, + refs: { + session_id: sessionId, + instance_id: instanceId, + timestamp: sessionTimestamp, + }, + } + + const sessionRawEntity = { + factory: FACTORY_SESSION_RAW, + refs: { + session_id: sessionId, + instance_id: instanceId, + timestamp: sessionTimestamp, + }, + storage: rawTranscript, + } + + const entityDefs = entities.map((e) => ({ + factory: FACTORY_ENTITY, + refs: { + name: e.name, + kind: e.kind || "other", + instance_id: instanceId, + session_id: sessionId, + turn_idx: "0", + notes: (e.notes || "").slice(0, 240), + }, + })) + + const factDefs = facts.map((f) => { + const refs: Record = { + predicate: f.predicate, + subject: f.subject || "user", + object: f.object || "", + value: (f.value || "").slice(0, 240), + event_date: f.event_date || "", + source: f.source || "user", + instance_id: instanceId, + session_id: sessionId, + turn_idx: String(f.turn_idx ?? 0), + session_timestamp: sessionTimestamp, + } + if (f.typed_refs) { + for (const [k, v] of Object.entries(f.typed_refs)) { + refs[k] = String(v) + } + } + return { + factory: FACTORY_FACT, + refs, + storage: f.statement, + } + }) + + const nameToIdx = new Map() + entities.forEach((e, i) => { + if (!nameToIdx.has(e.name)) nameToIdx.set(e.name, i + 1) // +1 because session is at 0 + }) + const factStart = 1 + entityDefs.length + + const triplets: Array<{ + subject: string + verb: string + target: string + }> = [] + + for (let i = 0; i < entityDefs.length; i++) { + triplets.push({ subject: "$entity.0", verb: VERB_MENTIONS, target: `$entity.${i + 1}` }) + } + for (let j = 0; j < facts.length; j++) { + const factIdx = factStart + j + triplets.push({ subject: "$entity.0", verb: VERB_STATES, target: `$entity.${factIdx}` }) + const objName = facts[j].object + if (objName && nameToIdx.has(objName)) { + triplets.push({ + subject: `$entity.${factIdx}`, + verb: VERB_ABOUT, + target: `$entity.${nameToIdx.get(objName)!}`, + }) + } + } + + // sessionRawEntity is appended last; it does not participate in any + // triplet so it doesn't affect the $entity.N indices for facts above. + return { + concepts: [VERB_MENTIONS, VERB_STATES, VERB_ABOUT], + entities: [sessionEntity, ...entityDefs, ...factDefs, sessionRawEntity], + triplets, + } +} + +/** + * Build a plain-text transcript of a UnifiedSession for storage in the + * `lme_session_raw` factory. Format: one line per turn, prefixed with a + * short role tag. A header captures the timestamp so the answerer can + * resolve relative dates without extra lookups. + */ +function renderSessionTranscript(session: UnifiedSession, sessionTimestamp: string): string { + const lines: string[] = [] + if (sessionTimestamp) lines.push(`[session on ${sessionTimestamp}]`) + lines.push(`[session_id ${session.sessionId}]`) + for (let i = 0; i < session.messages.length; i++) { + const m = session.messages[i] + const tag = m.role === "user" ? "USER" : "ASSISTANT" + lines.push(`[turn ${i} ${tag}] ${m.content}`) + } + return lines.join("\n") +} + +function normalizeHits( + resp: unknown +): SandraEntityHit[] { + if (Array.isArray(resp)) return resp as SandraEntityHit[] + if (resp && typeof resp === "object") { + // Sandra's tools use a few different envelope shapes: + // sandra_semantic_search → {results: [...]} + // sandra_search → {items: [...]} + // sandra_list_entities → {entities: [...]} + // We accept all three so callers don't need to care. + const r = resp as Record + if (Array.isArray(r.results)) return r.results as SandraEntityHit[] + if (Array.isArray(r.items)) return r.items as SandraEntityHit[] + if (Array.isArray(r.entities)) return r.entities as SandraEntityHit[] + } + return [] +} + +function hitToSearchResult(h: SandraEntityHit): SandraSearchHit { + const refs = h.refs || {} + const isEntity = h.factory === FACTORY_ENTITY + const isRaw = h.factory === FACTORY_SESSION_RAW + let statement: string + let predicate: string | undefined + if (isRaw) { + // Session raw: storage holds the full transcript. Cap it so a single + // massive session doesn't crowd out other facts from the answer prompt. + const raw = h.storage || "" + statement = raw.length > 3000 ? raw.slice(0, 3000) + " […transcript truncated]" : raw + predicate = "raw_session_transcript" + } else if (isEntity) { + // Entities: synthesize a descriptor so the answer prompt treats entity + // and fact hits uniformly. + statement = [refs.name, refs.kind ? `(${refs.kind})` : "", refs.notes ? `: ${refs.notes}` : ""] + .filter(Boolean) + .join(" ") + .trim() + predicate = `entity:${refs.kind || "other"}` + } else { + statement = h.storage || refs.value || "" + predicate = refs.predicate + } + return { + predicate, + statement, + value: refs.value || refs.name, + event_date: refs.event_date, + session_timestamp: refs.session_timestamp || refs.timestamp, + source: refs.source, + turn_idx: refs.turn_idx, + session_id: refs.session_id, + similarity: h.similarity ?? h.score, + } +} + +/** + * Extract significant content words from a natural-language query, for use + * with sandra_search's LIKE-based matching. Strategy: strip punctuation, + * drop stopwords (question words, articles, common prepositions, pronouns, + * short tokens), cap at 3 to keep the parallel request count bounded. + * + * Returns an empty array if no keyword qualifies (rare) — in that case the + * caller just falls back to semantic search only. + */ +function extractKeywords(query: string): string[] { + const STOPWORDS = new Set([ + // question words + "what", "when", "where", "who", "whom", "why", "how", "which", "whose", + // auxiliaries / copulas + "is", "are", "was", "were", "be", "been", "being", "am", + "do", "does", "did", "have", "has", "had", "can", "could", "will", "would", + "should", "may", "might", "must", "shall", + // articles & determiners + "the", "a", "an", "this", "that", "these", "those", "some", "any", "every", + // prepositions + "in", "of", "to", "for", "on", "at", "by", "with", "from", "about", + "into", "over", "under", "between", "through", + // pronouns + "i", "me", "my", "mine", "you", "your", "yours", "he", "him", "his", + "she", "her", "hers", "it", "its", "we", "us", "our", "ours", + "they", "them", "their", "theirs", + // common verbs / fillers + "also", "just", "very", "really", "much", "many", "more", "most", + "like", "want", "need", "tell", "say", "said", + // conjunctions + "and", "or", "but", "so", "if", "then", "because", "than", + // other + "not", "no", "yes", "now", "here", "there", "ever", "never", + "please", "mine", "ours", + ]) + const tokens = query + .toLowerCase() + .replace(/[^a-z0-9\s]/g, " ") + .split(/\s+/) + .filter((w) => w.length >= 4 && !STOPWORDS.has(w)) + // Deduplicate while preserving order. + const seen = new Set() + const unique: string[] = [] + for (const w of tokens) { + if (!seen.has(w)) { + seen.add(w) + unique.push(w) + } + } + return unique.slice(0, 3) +} + +export default SandraProvider diff --git a/src/providers/sandra/mcp-client.ts b/src/providers/sandra/mcp-client.ts new file mode 100644 index 0000000..f091875 --- /dev/null +++ b/src/providers/sandra/mcp-client.ts @@ -0,0 +1,264 @@ +/** + * JSON-RPC 2.0 client for Sandra's MCP HTTP server. Ports the behavior of + * `sandra/benchmark/longmemeval/src/sandra_mcp_client.py` so both benchmarks + * share the exact same wire-level contract. + * + * Session lifecycle: first tool call lazily sends `initialize`, then the + * `notifications/initialized` fire-and-forget, carrying the `Mcp-Session-Id` + * returned by the server on subsequent requests. The server may respond in + * either `application/json` or `text/event-stream`; we extract the first + * `data:` line when streamed. + */ + +import { logger } from "../../utils/logger" + +export interface SandraConfig { + url: string + token?: string + timeoutMs?: number +} + +export class SandraMCPError extends Error {} + +interface JsonRpcEnvelope { + jsonrpc: "2.0" + id?: string + method?: string + params?: unknown + result?: { content?: Array<{ type: string; text: string }> } & Record + error?: { code: number; message: string; data?: unknown } +} + +export class SandraMCPClient { + private readonly url: string + private readonly token: string | undefined + private readonly timeoutMs: number + private sessionId: string | undefined + private initialized = false + private initPromise: Promise | null = null + + constructor(config: SandraConfig) { + this.url = config.url + this.token = config.token + this.timeoutMs = config.timeoutMs ?? 600_000 + } + + private headers(): Record { + const h: Record = { + "Content-Type": "application/json", + Accept: "application/json, text/event-stream", + } + if (this.token) h["Authorization"] = `Bearer ${this.token}` + if (this.sessionId) h["Mcp-Session-Id"] = this.sessionId + return h + } + + private async post(body: unknown): Promise { + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), this.timeoutMs) + try { + return await fetch(this.url, { + method: "POST", + headers: this.headers(), + body: JSON.stringify(body), + signal: controller.signal, + }) + } finally { + clearTimeout(timer) + } + } + + private async ensureInitialized(): Promise { + if (this.initialized) return + if (!this.initPromise) { + this.initPromise = this.doInitialize() + } + await this.initPromise + } + + private async doInitialize(): Promise { + const initPayload = { + jsonrpc: "2.0" as const, + id: `init-${crypto.randomUUID().slice(0, 8)}`, + method: "initialize", + params: { + protocolVersion: "2025-11-25", + capabilities: {}, + clientInfo: { name: "memorybench-sandra", version: "0.1" }, + }, + } + const resp = await this.post(initPayload) + if (!resp.ok) { + const text = await resp.text() + throw new SandraMCPError( + `MCP initialize failed: HTTP ${resp.status}: ${text.slice(0, 300)}` + ) + } + const sid = resp.headers.get("Mcp-Session-Id") || resp.headers.get("mcp-session-id") + if (sid) this.sessionId = sid + // drain body so the connection is returned to the pool + await resp.text() + + const notify = { + jsonrpc: "2.0" as const, + method: "notifications/initialized", + params: {}, + } + // fire-and-forget + try { + await this.post(notify) + } catch (e) { + logger.debug(`notifications/initialized post failed: ${e}`) + } + this.initialized = true + } + + async callTool(name: string, args: Record): Promise { + await this.ensureInitialized() + + const payload = { + jsonrpc: "2.0" as const, + id: crypto.randomUUID(), + method: "tools/call", + params: { name, arguments: args }, + } + + // The built-in Bun HTTP server in Sandra occasionally drops concurrent + // keep-alive connections ("socket connection was closed unexpectedly"). + // One retry with exponential backoff handles the spurious drops without + // masking real problems. + const MAX_ATTEMPTS = 3 + let lastErr: unknown + let resp: Response | null = null + for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) { + try { + resp = await this.post(payload) + break + } catch (e) { + lastErr = e + if (attempt === MAX_ATTEMPTS) throw e + const msg = e instanceof Error ? e.message : String(e) + logger.debug( + `MCP ${name} attempt ${attempt}/${MAX_ATTEMPTS} failed: ${msg}. Retrying...` + ) + await new Promise((r) => setTimeout(r, 250 * attempt)) + } + } + if (!resp) throw lastErr + + if (!resp.ok) { + const text = await resp.text() + throw new SandraMCPError(`HTTP ${resp.status} from MCP: ${text.slice(0, 500)}`) + } + const raw = (await resp.text()).trim() + + let body = raw + if (body.startsWith("event:") || body.startsWith("data:")) { + for (const line of body.split("\n")) { + if (line.startsWith("data:")) { + body = line.slice(5).trim() + break + } + } + } + + let data: JsonRpcEnvelope + try { + data = JSON.parse(body) as JsonRpcEnvelope + } catch (e) { + throw new SandraMCPError(`Non-JSON body from MCP: ${body.slice(0, 300)}`) + } + if (data.error) { + throw new SandraMCPError( + `MCP error ${data.error.code}: ${data.error.message}` + ) + } + const result = data.result ?? {} + const content = result.content + if (Array.isArray(content) && content.length > 0 && content[0]?.type === "text") { + const text = content[0].text + try { + return JSON.parse(text) as T + } catch { + return text as unknown as T + } + } + return result as unknown as T + } + + // Typed convenience wrappers ------------------------------------------------ + + batch(payload: { + concepts?: string[] + entities?: Array<{ factory: string; refs: Record; storage?: string }> + triplets?: Array<{ + subject: string | number + verb: string | number + target: string | number + refs?: Record + }> + }): Promise { + return this.callTool("sandra_batch", { + concepts: payload.concepts ?? [], + entities: payload.entities ?? [], + triplets: payload.triplets ?? [], + }) + } + + semanticSearch(args: { + query: string + factory?: string + limit?: number + threshold?: number + include_storage?: boolean + fields?: string[] + }): Promise<{ results?: SandraEntityHit[] } | SandraEntityHit[]> { + return this.callTool("sandra_semantic_search", args as Record) + } + + search(args: { + query?: string + factory?: string + field?: string + limit?: number + fields?: string[] + include_storage?: boolean + }): Promise<{ results?: SandraEntityHit[] } | SandraEntityHit[]> { + return this.callTool("sandra_search", args as Record) + } + + embedAll(args: { factory?: string; limit?: number }): Promise<{ embedded?: number }> { + return this.callTool("sandra_embed_all", args as Record) + } + + listEntities(args: { + factory: string + limit?: number + offset?: number + fields?: string[] + include_storage?: boolean + }): Promise<{ entities?: SandraEntityHit[] } | SandraEntityHit[]> { + return this.callTool("sandra_list_entities", args as Record) + } +} + +export interface SandraBatchResult { + summary?: { + entitiesCreated?: number + tripletsCreated?: number + conceptsCreated?: number + refsAttached?: number + } + entities?: Array<{ id?: number; factory?: string }> + [key: string]: unknown +} + +export interface SandraEntityHit { + id?: number + factory?: string + refs?: Record + storage?: string + similarity?: number + score?: number + [key: string]: unknown +} diff --git a/src/providers/sandra/prompts.ts b/src/providers/sandra/prompts.ts new file mode 100644 index 0000000..07e4cf5 --- /dev/null +++ b/src/providers/sandra/prompts.ts @@ -0,0 +1,173 @@ +/** + * Prompts for Sandra: + * 1. EXTRACT_SYSTEM_PROMPT — used at ingestion to turn a raw session into + * structured {entities, facts}. Copied VERBATIM from the Python harness + * at `sandra/benchmark/longmemeval/src/ingest_claude.py:EXTRACT_SYSTEM_PROMPT` + * so the extraction quality is identical and the 76% baseline transfers. + * Do not edit without a matching edit on the Python side. + * + * 2. answerPrompt — how memorybench's answering LLM reads Sandra's retrieval + * output. Mirrors the shape mem0 and zep provide so the downstream + * reader treats all three providers comparably. + */ + +import type { ProviderPrompts } from "../../types/prompts" + +export const EXTRACT_SYSTEM_PROMPT = `You extract a structured knowledge-graph from a single chat session so a downstream system can later answer questions about it. + +Output a single JSON object with this exact shape: + +{ + "entities": [ + {"name": "", "kind": "person|place|product|activity|topic|other", "notes": ""} + ], + "facts": [ + { + "predicate": "", + "statement": "", + "subject": "user|assistant", + "source": "user|assistant|synthesis", + "object": "", + "value": "", + "event_date": "", + "turn_idx": , + "typed_refs": {"": , ...} + } + ] +} + +Extraction rules: + +1. **User facts** (source="user"): anything the user asserts — events, preferences, purchases, corrections, emotional states, plans. One fact per atomic claim. Do not merge. + +2. **Assistant facts** (source="assistant"): when the assistant provides structured information that might be referenced later — numbered lists, ranked recommendations, direct answers to factual questions, enumerated items. Put the full list verbatim in \`value\` (enumerated items separated by \\n so "1. Foo\\n2. Bar\\n..." is preserved). Use predicates like "assistant_listed_" or "assistant_recommended_". + +3. **Preference synthesis** (source="synthesis"): if the user reveals preference patterns across multiple turns (likes, dislikes, habits, constraints, goals), emit ONE fact at the end with predicate="preference_profile" whose \`value\` describes the user's preferences as a short rubric (e.g. "prefers healthy high-protein meals with quinoa and roasted vegetables, open to new twists on chicken salads and wraps, avoids high-calorie options"). This is how you answer "suggest me something" style questions. + +4. **Temporal absolutes**: ALWAYS resolve relative dates to absolute YYYY-MM-DD using the session timestamp as anchor. "Yesterday" when session is 2023/05/21 (Sun) → "2023-05-20". "Last Thursday" when session is 2023/05/27 (Sat) → "2023-05-25". If a date range is given ("March 9th" this year), use the session year. If truly unknown, leave empty — do NOT leave the relative phrase. + +5. **Knowledge updates**: if the user corrects a previous statement, emit BOTH the old and the new fact — each with its own turn_idx. Downstream picks the most recent. + +6. **Consistency**: use the same entity name consistently. Cite entities by name from the entities list above. + +7. **No hallucination**. If unsure, skip it. Better to miss a fact than invent one. + +8. **Output valid JSON only**. No prose, no markdown code fences. + +9. **Quantified events — ONE FACT PER EVENT, WITH A TYPED NUMERIC REF** (narrow rule, applies ONLY when the source explicitly states a count or measurement about an action): + + The \`value\` string stays as a human-readable label (e.g. "$40 lights", "3 rides", "347 miles YTD"). But you ALSO emit a **\`typed_refs\`** dict where the KEY uses the \`dimension[unit]\` convention and the VALUE is a clean number. This is the authoritative numeric channel that Sandra sums at read time WITHOUT mixing units. + + Convention: snake_case dimension + bracketed unit. Use ONLY these keys (case matters — all lowercase): + - \`cost[usd]\`, \`cost[eur]\`, \`cost[gbp]\` — money amounts + - \`distance[miles]\`, \`distance[km]\`, \`distance[m]\` — distances + - \`duration[minutes]\`, \`duration[hours]\`, \`duration[days]\` — durations + - \`count[times]\` — pure integer event counts (rides, attendances, festivals) + - \`mass[kg]\`, \`mass[lbs]\` — weights + - \`temperature[c]\`, \`temperature[f]\` — temperatures + + Examples: + - "I bought a Bell Zephyr helmet for $120" → \`{"predicate": "bought_helmet", "value": "$120 Bell Zephyr", "typed_refs": {"cost[usd]": 120}}\` + - "I rode the Mako, Kraken, and Manta rollercoasters at SeaWorld" → \`{"predicate": "rode_rollercoasters_at_seaworld", "value": "3 rides: Mako, Kraken, Manta", "typed_refs": {"count[times]": 3}}\` + - "I rode Space Mountain three times at Disneyland" → \`{"predicate": "rode_rollercoaster_at_disneyland", "value": "3 rides on Space Mountain", "typed_refs": {"count[times]": 3}}\` + - "I attended the Portland Film Festival" → \`{"predicate": "attended_film_festival_portland", "value": "Portland Film Festival", "typed_refs": {"count[times]": 1}}\` + - "I've clocked 347 miles since the start of the year" → \`{"predicate": "tracked_bike_mileage", "value": "347 miles YTD", "typed_refs": {"distance[miles]": 347}}\` + - "I ran a 5K in 35 minutes" → \`{"predicate": "ran_5k", "value": "35 min finish", "typed_refs": {"duration[minutes]": 35, "distance[km]": 5}}\` + - "My goal is 1000 miles by summer" → \`{"predicate": "goal_bike_mileage", "value": "1000 miles by summer", "typed_refs": {}}\` # GOAL, not actual — do NOT add typed_ref (would be summed wrongly) + - "I'm due to lubricate my chain on May 15th" → \`{"predicate": "due_lubricate_chain", "value": "May 15", "event_date": "2023-05-15", "typed_refs": {}}\` # date-only + - "I prefer quinoa over rice" → \`{"predicate": "prefers_quinoa", "value": "quinoa over rice", "typed_refs": {}}\` # qualitative + + Rules: + - DO NOT pre-aggregate — emit ONE fact per event with that event's number. + - DO NOT mix unit types in one fact — split into separate facts if needed. + - DO NOT add typed_ref for GOALS, INTENTIONS, FUTURE plans, or QUOTED PRICES that aren't a real spend (e.g. "this bike costs $1000" without a purchase = no typed_ref). + - DO NOT invent — if no number is stated, omit typed_refs. + - For dates use \`event_date\` (YYYY-MM-DD), NEVER a typed_ref. + - typed_refs is OPTIONAL — only emit when the fact is a genuinely quantified ACTUAL ACTION the user took.` + +export interface SandraSearchHit { + predicate?: string + statement?: string + value?: string + event_date?: string + session_timestamp?: string + source?: string + similarity?: number + turn_idx?: string + session_id?: string +} + +export function buildSandraAnswerPrompt( + question: string, + context: unknown[], + questionDate?: string +): string { + const hits = context as SandraSearchHit[] + // Sort hits so facts with explicit event_date come first (useful for + // temporal questions), then by session_timestamp descending (most recent + // first, useful for knowledge-update questions where the latest fact wins). + // This is a stable reorder — retrieval still decides what's in the list. + const sorted = [...hits].sort((a, b) => { + const aHasDate = a.event_date ? 1 : 0 + const bHasDate = b.event_date ? 1 : 0 + if (aHasDate !== bHasDate) return bHasDate - aHasDate + const aTs = a.session_timestamp || "" + const bTs = b.session_timestamp || "" + return bTs.localeCompare(aTs) + }) + const rendered = sorted + .map((h, i) => { + const dateTag = h.event_date ? ` [event: ${h.event_date}]` : "" + const sessTag = h.session_timestamp ? ` [session: ${h.session_timestamp}]` : "" + const srcTag = h.source ? ` (${h.source})` : "" + const predTag = h.predicate ? ` <${h.predicate}>` : "" + const head = h.statement || h.value || JSON.stringify(h) + return `[${i + 1}]${dateTag}${sessTag}${srcTag}${predTag} ${head}` + }) + .join("\n\n") + + const dateLine = questionDate ? `\nQuestion asked on: ${questionDate}\n` : "" + // Prompt style mirrors mem0's buildMem0AnswerPrompt so Sandra is evaluated + // under the same answer-generation constraints. The only adaptations are + // (a) telling the model the entries are structured facts with timestamps + // (Sandra's retrieval shape), and (b) asking it to synthesize across facts + // when the question calls for a recommendation or preference answer. + return `You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories. + +Key instructions: +- Analyze the facts with their timestamps carefully. +- **For knowledge-update questions** (e.g. "current X", "latest Y"): prioritize the most recent fact (facts are sorted most-recent first). Earlier contradicting facts are stale. +- **For enumeration / counting questions** (e.g. "how many", "list all"): count distinct events across ALL retrieved facts. Do not stop after finding one. +- **For temporal comparison** (e.g. "which first, X or Y"): compare event_date or session_timestamp on both items. Answer with the one whose date is earlier. +- **For preference / suggestion questions**: synthesize a recommendation from the user's prior behavior shown in the facts. +- Convert relative time references ("last year", "two months ago") into absolute dates using the session timestamps. +- Look for direct evidence in the facts; do not invent. +- Don't confuse character names with actual users. + +Fact entry format: +- \`[N]\` index of the fact +- \`[event: YYYY-MM-DD]\` when the event happened, if known +- \`[session: ... timestamp]\` when this fact was recorded in conversation +- \`(user|assistant|synthesis)\` source of the fact +- \`\` short semantic predicate +- Then the fact statement + +Special entries: facts with predicate \`\` are +**verbatim conversation dumps** from a past session. They contain details +that may not appear as structured facts elsewhere in the list. When +structured facts are silent on a question, read the raw transcripts +carefully — the answer often lives there as literal quoted text. +${dateLine} +Facts: +${rendered} + +Question: ${question} + +Answer concisely and directly.` +} + +export const SANDRA_PROMPTS: ProviderPrompts = { + answerPrompt: buildSandraAnswerPrompt, +} + +export default SANDRA_PROMPTS diff --git a/src/providers/sandra/schema.ts b/src/providers/sandra/schema.ts new file mode 100644 index 0000000..f32070e --- /dev/null +++ b/src/providers/sandra/schema.ts @@ -0,0 +1,46 @@ +/** + * Fixed factory + verb vocabulary shared with the in-repo Python harness at + * `sandra/benchmark/longmemeval/src/schema.py`. Keeping these strings identical + * means a database populated by either harness is queryable by the other, and + * that any graph-level comparison between the two Sandra benchmarks is on + * equal footing. + */ + +export const FACTORY_FACT = "lme_fact" +export const FACTORY_ENTITY = "lme_entity" +export const FACTORY_SESSION = "lme_session" +// Raw session transcript: one entity per UnifiedSession, storage holds the +// full turn dump. Exists as a retrieval fallback when the structured +// extractor misses details that are still present verbatim in the source +// conversation. Semantic search can pick these up when fact-level retrieval +// comes up empty. +export const FACTORY_SESSION_RAW = "lme_session_raw" + +export const VERB_MENTIONS = "lme_mentions" +export const VERB_STATES = "lme_states" +export const VERB_ABOUT = "lme_about" + +export const SESSION_FIELDS = ["session_id", "instance_id", "timestamp"] as const + +export interface ExtractedEntity { + name: string + kind: string + notes?: string +} + +export interface ExtractedFact { + predicate: string + statement: string + subject?: string + source?: "user" | "assistant" | "synthesis" + object?: string + value?: string + event_date?: string + turn_idx?: number + typed_refs?: Record +} + +export interface SessionExtraction { + entities: ExtractedEntity[] + facts: ExtractedFact[] +} diff --git a/src/types/provider.ts b/src/types/provider.ts index cdc0228..e2ded5d 100644 --- a/src/types/provider.ts +++ b/src/types/provider.ts @@ -47,4 +47,4 @@ export interface Provider { clear(containerTag: string): Promise } -export type ProviderName = "supermemory" | "mem0" | "zep" | "filesystem" | "rag" +export type ProviderName = "supermemory" | "mem0" | "zep" | "filesystem" | "rag" | "sandra" diff --git a/src/utils/config.ts b/src/utils/config.ts index 8ac1268..fd902c1 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -6,6 +6,8 @@ export interface Config { openaiApiKey: string anthropicApiKey: string googleApiKey: string + sandraUrl: string + sandraToken: string } export const config: Config = { @@ -16,9 +18,15 @@ export const config: Config = { openaiApiKey: process.env.OPENAI_API_KEY || "", anthropicApiKey: process.env.ANTHROPIC_API_KEY || "", googleApiKey: process.env.GOOGLE_API_KEY || "", + sandraUrl: process.env.SANDRA_URL || "http://localhost:8090/mcp", + sandraToken: process.env.SANDRA_TOKEN || "", } -export function getProviderConfig(provider: string): { apiKey: string; baseUrl?: string } { +export function getProviderConfig(provider: string): { + apiKey: string + baseUrl?: string + token?: string +} { switch (provider) { case "supermemory": return { apiKey: config.supermemoryApiKey, baseUrl: config.supermemoryBaseUrl } @@ -30,6 +38,14 @@ export function getProviderConfig(provider: string): { apiKey: string; baseUrl?: return { apiKey: config.openaiApiKey } // Filesystem uses OpenAI for memory extraction case "rag": return { apiKey: config.openaiApiKey } // RAG provider uses OpenAI for embeddings + case "sandra": + // Sandra's API key slot carries ANTHROPIC_API_KEY (used by the + // ingestion extractor). baseUrl + token address the MCP HTTP server. + return { + apiKey: config.anthropicApiKey, + baseUrl: config.sandraUrl, + token: config.sandraToken, + } default: throw new Error(`Unknown provider: ${provider}`) }