diff --git a/src/cli/index.ts b/src/cli/index.ts
index b3c29d6..f86751f 100644
--- a/src/cli/index.ts
+++ b/src/cli/index.ts
@@ -77,12 +77,19 @@ Available providers for storing and retrieving memories:
                  Extracts memories via LLM, chunks + embeds extracted content, hybrid BM25 + vector search.
                  Requires: OPENAI_API_KEY (for memory extraction via gpt-4o-mini + embeddings)
 
+  sandra         Sandra - Semantic graph database with typed refs and entity factories.
+                 Extracts entities + facts per session via Claude, pushes one sandra_batch per session,
+                 retrieves via semantic search on the lme_fact factory scoped by instance_id.
+                 Requires: ANTHROPIC_API_KEY (extractor) + SANDRA_URL (MCP HTTP server, default http://localhost:8090/mcp)
+                 Optional: SANDRA_TOKEN (bearer token for auth), SANDRA_EXTRACTOR_MODEL (default claude-haiku-4-5-20251001)
+
 Usage:
   -p supermemory    Use Supermemory as the memory provider
   -p mem0           Use Mem0 as the memory provider
   -p zep            Use Zep as the memory provider
   -p filesystem     Use file-based memory (CLAUDE.md style)
   -p rag            Use hybrid RAG memory (OpenClaw/QMD style)
+  -p sandra         Use Sandra semantic graph database
 `)
 }
 
diff --git a/src/providers/index.ts b/src/providers/index.ts
index 5f71566..c33c74f 100644
--- a/src/providers/index.ts
+++ b/src/providers/index.ts
@@ -5,6 +5,7 @@ import { Mem0Provider } from "./mem0"
 import { ZepProvider } from "./zep"
 import { FilesystemProvider } from "./filesystem"
 import { RAGProvider } from "./rag"
+import { SandraProvider } from "./sandra"
 
 const providers: Record<ProviderName, new () => Provider> = {
   supermemory: SupermemoryProvider,
@@ -12,6 +13,7 @@ const providers: Record<ProviderName, new () => Provider> = {
   zep: ZepProvider,
   filesystem: FilesystemProvider,
   rag: RAGProvider,
+  sandra: SandraProvider,
 }
 
 export function createProvider(name: ProviderName): Provider {
@@ -39,4 +41,4 @@ export function getProviderInfo(name: ProviderName): {
   }
 }
 
-export { SupermemoryProvider, Mem0Provider, ZepProvider, FilesystemProvider, RAGProvider }
+export { SupermemoryProvider, Mem0Provider, ZepProvider, FilesystemProvider, RAGProvider, SandraProvider }
diff --git a/src/providers/sandra/extractor.ts b/src/providers/sandra/extractor.ts
new file mode 100644
index 0000000..64f65cc
--- /dev/null
+++ b/src/providers/sandra/extractor.ts
@@ -0,0 +1,207 @@
+/**
+ * LLM-based session extractor. Calls Claude Haiku via the Vercel AI SDK
+ * (already a memorybench dependency) to convert raw chat turns into the
+ * {entities, facts} JSON that the Sandra graph writer consumes.
+ *
+ * Mirrors `sandra/benchmark/longmemeval/src/ingest_claude.py`:
+ *   - one API call per session
+ *   - parse JSON from the response, stripping markdown fences if present
+ *   - one retry with a stricter instruction on JSONDecodeError
+ *
+ * Model default is `claude-haiku-4-5-20251001` to match the Python default.
+ * Override via the `SANDRA_EXTRACTOR_MODEL` env var if needed.
+ */
+
+import { generateText, type LanguageModel } from "ai"
+import { createAnthropic } from "@ai-sdk/anthropic"
+import { createOpenAI } from "@ai-sdk/openai"
+import type { UnifiedMessage } from "../../types/unified"
+import { logger } from "../../utils/logger"
+import { EXTRACT_SYSTEM_PROMPT } from "./prompts"
+import type { SessionExtraction } from "./schema"
+
+const DEFAULT_ANTHROPIC_MODEL = "claude-haiku-4-5-20251001"
+const DEFAULT_OPENAI_MODEL = "gpt-4o-mini"
+const MAX_OUTPUT_TOKENS = 8000
+
+export interface ExtractorConfig {
+  apiKey: string // Anthropic key when present; falls through to OPENAI_API_KEY if empty.
+  model?: string
+}
+
+export interface ExtractorStats {
+  inputTokens: number
+  outputTokens: number
+  apiCalls: number
+  parseFailures: number
+}
+
+/**
+ * Dual-provider extractor that mirrors the fallback logic of the Python
+ * benchmark's `llm_client.py`: prefer Anthropic (Claude Haiku) when an API
+ * key is configured, otherwise fall back to OpenAI (gpt-4o-mini). The model
+ * name can be overridden via the `SANDRA_EXTRACTOR_MODEL` env var.
+ */
+export class SessionExtractor {
+  private readonly model: LanguageModel
+  readonly modelName: string
+  readonly providerName: "anthropic" | "openai"
+  readonly stats: ExtractorStats = {
+    inputTokens: 0,
+    outputTokens: 0,
+    apiCalls: 0,
+    parseFailures: 0,
+  }
+
+  constructor(config: ExtractorConfig) {
+    const override = config.model || process.env.SANDRA_EXTRACTOR_MODEL
+    const anthropicKey = config.apiKey || process.env.ANTHROPIC_API_KEY || ""
+    const openaiKey = process.env.OPENAI_API_KEY || ""
+
+    if (anthropicKey) {
+      const provider = createAnthropic({ apiKey: anthropicKey })
+      this.modelName = override || DEFAULT_ANTHROPIC_MODEL
+      this.model = provider(this.modelName)
+      this.providerName = "anthropic"
+    } else if (openaiKey) {
+      const provider = createOpenAI({ apiKey: openaiKey })
+      this.modelName = override || DEFAULT_OPENAI_MODEL
+      this.model = provider(this.modelName)
+      this.providerName = "openai"
+    } else {
+      throw new Error(
+        "Sandra extractor needs an LLM key: set ANTHROPIC_API_KEY (Claude Haiku) or OPENAI_API_KEY (gpt-4o-mini)."
+      )
+    }
+    logger.info(
+      `Sandra extractor using ${this.providerName} / ${this.modelName}`
+    )
+  }
+
+  async extract(
+    messages: UnifiedMessage[],
+    sessionTimestamp?: string
+  ): Promise<SessionExtraction> {
+    const prompt = this.buildSessionPrompt(messages, sessionTimestamp)
+    const first = await this.callModel(EXTRACT_SYSTEM_PROMPT, prompt)
+
+    const parsed = tryParseJson(first)
+    if (parsed) return sanitize(parsed)
+
+    this.stats.parseFailures += 1
+    logger.debug("Extractor got non-JSON response; retrying once")
+
+    const second = await this.callModel(
+      EXTRACT_SYSTEM_PROMPT +
+        "\nCRITICAL: Output valid JSON only. No prose, no markdown fences.",
+      prompt + "\n\nPrevious output was not valid JSON. Re-emit valid JSON only."
+    )
+    const retried = tryParseJson(second)
+    if (retried) return sanitize(retried)
+
+    logger.warn("Extractor failed twice; returning empty extraction")
+    return { entities: [], facts: [] }
+  }
+
+  private buildSessionPrompt(messages: UnifiedMessage[], ts?: string): string {
+    const header = ts ? `Session timestamp: ${ts}\n\n` : ""
+    const lines = messages.map((m, i) => `[turn ${i} ${m.role}]\n${m.content}`)
+    return header + lines.join("\n\n")
+  }
+
+  private async callModel(system: string, prompt: string): Promise<string> {
+    const result = await generateText({
+      model: this.model,
+      system,
+      prompt,
+      maxOutputTokens: MAX_OUTPUT_TOKENS,
+    })
+    this.stats.apiCalls += 1
+    const usage = result.usage
+    if (usage) {
+      this.stats.inputTokens += usage.inputTokens ?? 0
+      this.stats.outputTokens += usage.outputTokens ?? 0
+    }
+    return (result.text ?? "").trim()
+  }
+}
+
+function tryParseJson(text: string): SessionExtraction | null {
+  let body = text.trim()
+  if (body.startsWith("```")) {
+    // strip leading fence incl. optional language tag
+    body = body.replace(/^```[a-zA-Z]*\n?/, "")
+    if (body.endsWith("```")) body = body.slice(0, -3)
+    body = body.trim()
+  }
+  if (!body.startsWith("{")) {
+    // some models prefix a single explanatory line; grab the first {...} block
+    const match = body.match(/\{[\s\S]*\}/)
+    if (!match) return null
+    body = match[0]
+  }
+  try {
+    const obj = JSON.parse(body)
+    if (!obj || typeof obj !== "object") return null
+    const entities = Array.isArray(obj.entities) ? obj.entities : []
+    const facts = Array.isArray(obj.facts) ? obj.facts : []
+    return { entities, facts }
+  } catch {
+    return null
+  }
+}
+
+function sanitize(raw: SessionExtraction): SessionExtraction {
+  const entities = raw.entities
+    .filter((e) => e && typeof e.name === "string" && e.name.trim() !== "")
+    .map((e) => ({
+      name: String(e.name).trim(),
+      kind: (e.kind && String(e.kind)) || "other",
+      notes: e.notes ? String(e.notes).slice(0, 240) : "",
+    }))
+
+  const facts = raw.facts
+    .filter(
+      (f) =>
+        f &&
+        typeof f.predicate === "string" &&
+        f.predicate.trim() !== "" &&
+        typeof f.statement === "string" &&
+        f.statement.trim() !== ""
+    )
+    .map((f) => {
+      const typedRefs: Record<string, number> = {}
+      const rawTyped = (f as { typed_refs?: unknown }).typed_refs
+      if (rawTyped && typeof rawTyped === "object") {
+        for (const [k, v] of Object.entries(rawTyped as Record<string, unknown>)) {
+          const num = Number(v)
+          if (!Number.isNaN(num) && Number.isFinite(num) && k.trim() !== "") {
+            typedRefs[k.trim()] = num
+          }
+        }
+      }
+      let turnIdx = 0
+      const rawTurn = (f as { turn_idx?: unknown }).turn_idx
+      if (typeof rawTurn === "number") turnIdx = Math.floor(rawTurn)
+      else if (typeof rawTurn === "string") {
+        const n = parseInt(rawTurn, 10)
+        if (!Number.isNaN(n)) turnIdx = n
+      }
+      return {
+        predicate: String(f.predicate).trim(),
+        statement: String(f.statement),
+        subject: (f.subject && String(f.subject)) || "user",
+        source: ((f.source && String(f.source)) || "user") as
+          | "user"
+          | "assistant"
+          | "synthesis",
+        object: (f.object && String(f.object)) || "",
+        value: (f.value && String(f.value)) || "",
+        event_date: (f.event_date && String(f.event_date)) || "",
+        turn_idx: turnIdx,
+        typed_refs: typedRefs,
+      }
+    })
+
+  return { entities, facts }
+}
diff --git a/src/providers/sandra/index.ts b/src/providers/sandra/index.ts
new file mode 100644
index 0000000..ec07299
--- /dev/null
+++ b/src/providers/sandra/index.ts
@@ -0,0 +1,645 @@
+/**
+ * SandraProvider — memorybench adapter for the Sandra semantic graph database.
+ *
+ * Design notes:
+ *   - Ingestion mirrors the Python harness's two-stage pipeline: LLM-extract
+ *     entities + facts from each session, then push one sandra_batch per
+ *     session. The extracted `{instance_id=containerTag}` ref is how
+ *     per-question memory scoping is enforced at retrieval time.
+ *   - Retrieval is single-shot: sandra_semantic_search on lme_fact storage,
+ *     client-filter by instance_id == containerTag, return top-N. No
+ *     multi-turn tool-use agent — that would be unfair vs Mem0/Zep, which
+ *     return static search results. This is Sandra's out-of-the-box behavior.
+ *   - clear() is a best-effort no-op like Supermemory's — test runs use a
+ *     dedicated DB (SANDRA_DB=benchmark_mb) so physical isolation comes from
+ *     the database, not per-containerTag deletion.
+ */
+
+import type {
+  Provider,
+  ProviderConfig,
+  IngestOptions,
+  IngestResult,
+  SearchOptions,
+  IndexingProgressCallback,
+} from "../../types/provider"
+import type { UnifiedSession } from "../../types/unified"
+import { logger } from "../../utils/logger"
+import { SandraMCPClient, type SandraEntityHit } from "./mcp-client"
+import { SessionExtractor } from "./extractor"
+import {
+  FACTORY_ENTITY,
+  FACTORY_FACT,
+  FACTORY_SESSION,
+  FACTORY_SESSION_RAW,
+  VERB_ABOUT,
+  VERB_MENTIONS,
+  VERB_STATES,
+  type ExtractedEntity,
+  type ExtractedFact,
+} from "./schema"
+import { SANDRA_PROMPTS, type SandraSearchHit } from "./prompts"
+
+const DEFAULT_SANDRA_URL = process.env.SANDRA_URL || "http://localhost:8090/mcp"
+const DEFAULT_SANDRA_TOKEN = process.env.SANDRA_TOKEN || ""
+
+export class SandraProvider implements Provider {
+  name = "sandra"
+  prompts = SANDRA_PROMPTS
+  concurrency = {
+    default: 4,
+    ingest: 4,
+    indexing: 1,
+    // Sandra's MCP HTTP server is a single-threaded PHP process; bursty
+    // parallel search requests can drop keep-alive connections. Keep search
+    // concurrency modest — the MCP client also retries on socket errors.
+    search: 2,
+  }
+
+  private mcp: SandraMCPClient | null = null
+  private extractor: SessionExtractor | null = null
+  // One global batch-embed covers all containerTags because the factory-level
+  // embed_all walks every unembedded `lme_fact` entity, regardless of
+  // instance_id. We run it once per provider lifetime; subsequent
+  // awaitIndexing() calls piggy-back on the same promise.
+  private globalEmbedPromise: Promise<number> | null = null
+
+  async initialize(config: ProviderConfig): Promise<void> {
+    const baseUrl = (config.baseUrl as string | undefined) || DEFAULT_SANDRA_URL
+    const token = (config.token as string | undefined) || DEFAULT_SANDRA_TOKEN || undefined
+    this.mcp = new SandraMCPClient({ url: baseUrl, token })
+    this.extractor = new SessionExtractor({ apiKey: config.apiKey })
+    logger.info(`Initialized Sandra provider (url=${baseUrl})`)
+  }
+
+  async ingest(
+    sessions: UnifiedSession[],
+    options: IngestOptions
+  ): Promise<IngestResult> {
+    if (!this.mcp || !this.extractor) throw new Error("Provider not initialized")
+
+    const documentIds: string[] = []
+    const INNER_CONCURRENCY = Math.max(1, Number(process.env.SANDRA_SESSION_PARALLELISM ?? 6))
+
+    // Run sessions in parallel chunks. A single LongMemEval question can have
+    // 40+ sessions; serial ingestion means Sandra spends most of its time
+    // idle. Cap with SANDRA_SESSION_PARALLELISM (default 6) to avoid pounding
+    // the LLM extractor or the MCP server.
+    for (let i = 0; i < sessions.length; i += INNER_CONCURRENCY) {
+      const batch = sessions.slice(i, i + INNER_CONCURRENCY)
+      const results = await Promise.all(batch.map((s) => this.ingestOneSession(options.containerTag, s)))
+      for (const ids of results) documentIds.push(...ids)
+    }
+
+    return { documentIds }
+  }
+
+  private async ingestOneSession(
+    containerTag: string,
+    session: UnifiedSession
+  ): Promise<string[]> {
+    if (!this.mcp || !this.extractor) throw new Error("Provider not initialized")
+    const formattedDate = (session.metadata?.formattedDate as string) || ""
+    const isoDate = (session.metadata?.date as string) || ""
+    const sessionTimestamp = formattedDate || isoDate
+
+    // Always dump the raw session transcript as an lme_session_raw entity,
+    // even if the structured extraction fails later. The raw storage acts
+    // as a verbatim fallback that semantic search can hit when the
+    // extracted fact graph misses a detail.
+    const rawTranscript = renderSessionTranscript(session, sessionTimestamp)
+
+    let extracted
+    try {
+      extracted = await this.extractor.extract(session.messages, sessionTimestamp)
+    } catch (e) {
+      logger.warn(`Extraction failed for ${session.sessionId}: ${e}`)
+      // Still push the raw transcript so retrieval at least has something.
+      await this.pushRawSessionOnly(containerTag, session.sessionId, sessionTimestamp, rawTranscript).catch(() => undefined)
+      return []
+    }
+
+    const batchPayload = buildBatchPayload(
+      containerTag,
+      session.sessionId,
+      sessionTimestamp,
+      extracted.entities,
+      extracted.facts,
+      rawTranscript
+    )
+
+    try {
+      const result = await this.mcp.batch(batchPayload)
+      const created = result.summary?.entitiesCreated ?? 0
+      const ids: string[] = []
+      for (let i = 0; i < created; i++) {
+        ids.push(`${session.sessionId}:${i}`)
+      }
+      return ids
+    } catch (e) {
+      logger.warn(`sandra_batch failed for ${session.sessionId}: ${e}`)
+      return []
+    }
+  }
+
+  private async pushRawSessionOnly(
+    containerTag: string,
+    sessionId: string,
+    sessionTimestamp: string,
+    rawTranscript: string
+  ): Promise<void> {
+    if (!this.mcp) return
+    if (!rawTranscript) return
+    await this.mcp.batch({
+      concepts: [],
+      entities: [
+        {
+          factory: FACTORY_SESSION_RAW,
+          refs: {
+            session_id: sessionId,
+            instance_id: containerTag,
+            timestamp: sessionTimestamp,
+          },
+          storage: rawTranscript,
+        },
+      ],
+      triplets: [],
+    })
+  }
+
+  async awaitIndexing(
+    _result: IngestResult,
+    containerTag: string,
+    onProgress?: IndexingProgressCallback
+  ): Promise<void> {
+    if (!this.mcp) throw new Error("Provider not initialized")
+
+    onProgress?.({ completedIds: [], failedIds: [], total: 1 })
+    // Kick off (or join) the single batch-embed pass for the whole run.
+    if (this.globalEmbedPromise === null) {
+      this.globalEmbedPromise = this.runBatchEmbed()
+    }
+    try {
+      const total = await this.globalEmbedPromise
+      logger.debug(
+        `sandra_embed_all(lme_fact) total embedded=${total} (container=${containerTag})`
+      )
+    } catch (e) {
+      logger.warn(`sandra_embed_all failed: ${e}`)
+    }
+    onProgress?.({ completedIds: ["embed"], failedIds: [], total: 1 })
+  }
+
+  private async runBatchEmbed(): Promise<number> {
+    if (!this.mcp) throw new Error("Provider not initialized")
+    // Per-page size must be small enough for a single embed_all call to
+    // finish within the MCP client timeout. Each embedding takes
+    // ~200-400ms, so PAGE_LIMIT=200 → page ≤ 80s — comfortably under the
+    // 10-min client timeout.
+    const PAGE_LIMIT = Math.max(
+      50,
+      Number(process.env.SANDRA_EMBED_PAGE ?? 200)
+    )
+    let totalEmbedded = 0
+    // Embed every factory that the search path might hit via semantic
+    // search. Without this, new session_raw entities created with
+    // SANDRA_SKIP_AUTO_EMBED=1 would never get an embedding and the
+    // fallback retrieval would silently return empty.
+    const factories = [FACTORY_FACT, FACTORY_ENTITY, FACTORY_SESSION_RAW]
+    for (const factory of factories) {
+      let pageIdx = 0
+      while (true) {
+        pageIdx += 1
+        const resp = await this.mcp.embedAll({ factory, limit: PAGE_LIMIT })
+        const n = resp.embedded ?? 0
+        totalEmbedded += n
+        logger.info(
+          `sandra_embed_all(${factory}) page ${pageIdx}: embedded=${n}, total=${totalEmbedded}`
+        )
+        if (n === 0) break
+      }
+    }
+    return totalEmbedded
+  }
+
+  async search(query: string, options: SearchOptions): Promise<unknown[]> {
+    if (!this.mcp) throw new Error("Provider not initialized")
+    // Ignore memorybench's default limit=10 — we want a wider context window
+    // for the answerer. Respect an explicit caller-provided limit if set > 10.
+    const limit = Math.max(options.limit ?? 10, 50)
+
+    // Instance-scoped retrieval strategy:
+    //
+    // 1. Fetch ALL facts tagged with this instance_id via
+    //    `sandra_search(field=instance_id)`. This is full-recall for the
+    //    question's scope — no cross-instance top-K dilution.
+    //    WITHOUT storage to keep the response small (storage text can be
+    //    several KB per fact × 500 facts → multi-MB response that drops
+    //    the keep-alive connection). We fetch storage in a second pass
+    //    only for the top-N after ranking.
+    // 2. Also fetch entities tagged with this instance_id.
+    // 3. Run semantic_search on the main query globally, then use the ranking
+    //    ONLY to promote relevant items from within the instance-scoped pool.
+    // 4. The final list is instance-scoped facts+entities, reordered so that
+    //    items semantically closest to the query come first.
+    //
+    // Why not filter `sandra_semantic_search` results by instance_id?
+    // Because that operation is a top-K over the global factory; if another
+    // instance's content is a better semantic match, we lose our own
+    // candidates before we can look at them. Instance-scoped first, ranked
+    // after.
+    // Instance-scoped pool size. Kept moderate so the response payload
+    // stays under Sandra's HTTP keep-alive comfort zone — going too high
+    // (tried 600) drops the socket mid-transfer.
+    const POOL_LIMIT = 300
+
+    // Extended retrieval now also covers `lme_session_raw` — the verbatim
+    // transcript fallback. Semantic search on these hits finds details the
+    // LLM extractor skipped. We also do a scoped search on session_raw to
+    // guarantee at least a baseline presence in the pool — a global top-K
+    // over this factory often returns raw sessions from other instances.
+    const [factPool, entityPool, rawPool, semHits, semEntityHits, semRawHits] = await Promise.all([
+      this.mcp
+        .search({
+          query: options.containerTag,
+          factory: FACTORY_FACT,
+          field: "instance_id",
+          limit: POOL_LIMIT,
+          include_storage: false, // keep response lean
+        })
+        .catch((e) => {
+          logger.warn(`scoped lme_fact search failed: ${e}`)
+          return { items: [] as SandraEntityHit[] }
+        }),
+      this.mcp
+        .search({
+          query: options.containerTag,
+          factory: FACTORY_ENTITY,
+          field: "instance_id",
+          limit: POOL_LIMIT,
+          include_storage: false,
+        })
+        .catch((e) => {
+          logger.warn(`scoped lme_entity search failed: ${e}`)
+          return { items: [] as SandraEntityHit[] }
+        }),
+      // Scoped raw sessions: we need the transcripts (storage) but the
+      // payload size is the critical constraint — even ~60 sessions of
+      // 5-15KB each drops the HTTP keep-alive. Fetch just 20, then rely on
+      // semantic ranking (semRawHits) to surface the most-relevant raw
+      // sessions for this query.
+      this.mcp
+        .search({
+          query: options.containerTag,
+          factory: FACTORY_SESSION_RAW,
+          field: "instance_id",
+          limit: 20,
+          include_storage: true,
+        })
+        .catch((e) => {
+          logger.warn(`scoped lme_session_raw search failed: ${e}`)
+          return { items: [] as SandraEntityHit[] }
+        }),
+      this.mcp
+        .semanticSearch({
+          query,
+          factory: FACTORY_FACT,
+          limit: 200,
+          threshold: options.threshold ?? 0.0,
+          include_storage: true, // semantic hits already bring storage
+        })
+        .catch((e) => {
+          logger.warn(`semantic_search lme_fact failed: ${e}`)
+          return { results: [] as SandraEntityHit[] }
+        }),
+      this.mcp
+        .semanticSearch({
+          query,
+          factory: FACTORY_ENTITY,
+          limit: 60,
+          threshold: options.threshold ?? 0.0,
+          include_storage: true,
+        })
+        .catch((e) => {
+          logger.warn(`semantic_search lme_entity failed: ${e}`)
+          return { results: [] as SandraEntityHit[] }
+        }),
+      this.mcp
+        .semanticSearch({
+          query,
+          factory: FACTORY_SESSION_RAW,
+          limit: 12,
+          threshold: options.threshold ?? 0.0,
+          include_storage: true,
+        })
+        .catch((e) => {
+          logger.warn(`semantic_search lme_session_raw failed: ${e}`)
+          return { results: [] as SandraEntityHit[] }
+        }),
+    ])
+
+    const poolById = new Map<number, SandraEntityHit>()
+    // Seed pool from the instance-scoped searches (guaranteed instance_id).
+    for (const r of [factPool, entityPool, rawPool]) {
+      for (const h of normalizeHits(r)) {
+        if (typeof h.id !== "number") continue
+        poolById.set(h.id, h)
+      }
+    }
+    // Merge semantic hits into the pool only when they match instance_id.
+    // These bring storage (the `statement` field) that the scoped search
+    // did not fetch. For items already in the pool, upgrade their entry
+    // with the richer semantic hit (keeps the storage).
+    for (const r of [semHits, semEntityHits, semRawHits]) {
+      for (const h of normalizeHits(r)) {
+        if (typeof h.id !== "number") continue
+        if (h.refs?.instance_id !== options.containerTag) continue
+        poolById.set(h.id, h)
+      }
+    }
+
+    // Build semantic rank: earlier in the sem list → higher rank.
+    const semRank = new Map<number, number>()
+    let rankCounter = 0
+    for (const r of [semRawHits, semHits, semEntityHits]) {
+      for (const h of normalizeHits(r)) {
+        if (typeof h.id !== "number") continue
+        if (!poolById.has(h.id)) continue
+        if (!semRank.has(h.id)) semRank.set(h.id, rankCounter++)
+      }
+    }
+
+    // Split the limit budget: guarantee a fixed slice for raw-session
+    // transcripts (fallback coverage) and a fixed slice for facts+entities
+    // (structured extraction). The raw budget is small — a couple of
+    // transcripts are enough to let the answerer cross-check details the
+    // extractor missed — but it MUST be non-zero or the whole mechanism
+    // is invisible.
+    const RAW_BUDGET = Math.min(8, Math.floor(limit * 0.15) + 2)
+
+    const rawHits: SandraEntityHit[] = []
+    const structuredHits: SandraEntityHit[] = []
+    for (const h of poolById.values()) {
+      if (h.factory === FACTORY_SESSION_RAW) rawHits.push(h)
+      else structuredHits.push(h)
+    }
+    const semSort = (a: SandraEntityHit, b: SandraEntityHit) => {
+      const ra = semRank.has(a.id!) ? semRank.get(a.id!)! : Infinity
+      const rb = semRank.has(b.id!) ? semRank.get(b.id!)! : Infinity
+      if (ra !== rb) return ra - rb
+      const af = a.refs?.predicate ? 0 : 1
+      const bf = b.refs?.predicate ? 0 : 1
+      return af - bf
+    }
+    rawHits.sort(semSort)
+    structuredHits.sort(semSort)
+
+    // Only keep raw hits that actually carry storage (the transcript).
+    // Raw pool entries without storage are useless to the answerer.
+    const rawWithStorage = rawHits.filter((h) => h.storage && h.storage.length > 0)
+    const rawSlice = rawWithStorage.slice(0, RAW_BUDGET)
+    const structuredSlice = structuredHits.slice(0, limit - rawSlice.length)
+
+    // Interleave so raw transcripts show up near the top of the prompt —
+    // the answerer is more likely to cite them when they're not buried.
+    const ranked = [...rawSlice, ...structuredSlice]
+    return ranked.map((h) => hitToSearchResult(h))
+  }
+
+  async clear(containerTag: string): Promise<void> {
+    // Sandra's MCP surface does not currently expose a bulk delete-by-ref.
+    // Benchmark runs isolate via a dedicated database (SANDRA_DB=benchmark_mb).
+    // We log a warning to make this explicit when the runner calls clear().
+    logger.warn(
+      `Sandra clear() is a no-op; per-containerTag deletion not exposed. Use a dedicated DB for isolation. (containerTag=${containerTag})`
+    )
+  }
+}
+
+function buildBatchPayload(
+  instanceId: string,
+  sessionId: string,
+  sessionTimestamp: string,
+  entities: ExtractedEntity[],
+  facts: ExtractedFact[],
+  rawTranscript: string
+) {
+  const sessionEntity = {
+    factory: FACTORY_SESSION,
+    refs: {
+      session_id: sessionId,
+      instance_id: instanceId,
+      timestamp: sessionTimestamp,
+    },
+  }
+
+  const sessionRawEntity = {
+    factory: FACTORY_SESSION_RAW,
+    refs: {
+      session_id: sessionId,
+      instance_id: instanceId,
+      timestamp: sessionTimestamp,
+    },
+    storage: rawTranscript,
+  }
+
+  const entityDefs = entities.map((e) => ({
+    factory: FACTORY_ENTITY,
+    refs: {
+      name: e.name,
+      kind: e.kind || "other",
+      instance_id: instanceId,
+      session_id: sessionId,
+      turn_idx: "0",
+      notes: (e.notes || "").slice(0, 240),
+    },
+  }))
+
+  const factDefs = facts.map((f) => {
+    const refs: Record<string, string> = {
+      predicate: f.predicate,
+      subject: f.subject || "user",
+      object: f.object || "",
+      value: (f.value || "").slice(0, 240),
+      event_date: f.event_date || "",
+      source: f.source || "user",
+      instance_id: instanceId,
+      session_id: sessionId,
+      turn_idx: String(f.turn_idx ?? 0),
+      session_timestamp: sessionTimestamp,
+    }
+    if (f.typed_refs) {
+      for (const [k, v] of Object.entries(f.typed_refs)) {
+        refs[k] = String(v)
+      }
+    }
+    return {
+      factory: FACTORY_FACT,
+      refs,
+      storage: f.statement,
+    }
+  })
+
+  const nameToIdx = new Map<string, number>()
+  entities.forEach((e, i) => {
+    if (!nameToIdx.has(e.name)) nameToIdx.set(e.name, i + 1) // +1 because session is at 0
+  })
+  const factStart = 1 + entityDefs.length
+
+  const triplets: Array<{
+    subject: string
+    verb: string
+    target: string
+  }> = []
+
+  for (let i = 0; i < entityDefs.length; i++) {
+    triplets.push({ subject: "$entity.0", verb: VERB_MENTIONS, target: `$entity.${i + 1}` })
+  }
+  for (let j = 0; j < facts.length; j++) {
+    const factIdx = factStart + j
+    triplets.push({ subject: "$entity.0", verb: VERB_STATES, target: `$entity.${factIdx}` })
+    const objName = facts[j].object
+    if (objName && nameToIdx.has(objName)) {
+      triplets.push({
+        subject: `$entity.${factIdx}`,
+        verb: VERB_ABOUT,
+        target: `$entity.${nameToIdx.get(objName)!}`,
+      })
+    }
+  }
+
+  // sessionRawEntity is appended last; it does not participate in any
+  // triplet so it doesn't affect the $entity.N indices for facts above.
+  return {
+    concepts: [VERB_MENTIONS, VERB_STATES, VERB_ABOUT],
+    entities: [sessionEntity, ...entityDefs, ...factDefs, sessionRawEntity],
+    triplets,
+  }
+}
+
+/**
+ * Build a plain-text transcript of a UnifiedSession for storage in the
+ * `lme_session_raw` factory. Format: one line per turn, prefixed with a
+ * short role tag. A header captures the timestamp so the answerer can
+ * resolve relative dates without extra lookups.
+ */
+function renderSessionTranscript(session: UnifiedSession, sessionTimestamp: string): string {
+  const lines: string[] = []
+  if (sessionTimestamp) lines.push(`[session on ${sessionTimestamp}]`)
+  lines.push(`[session_id ${session.sessionId}]`)
+  for (let i = 0; i < session.messages.length; i++) {
+    const m = session.messages[i]
+    const tag = m.role === "user" ? "USER" : "ASSISTANT"
+    lines.push(`[turn ${i} ${tag}] ${m.content}`)
+  }
+  return lines.join("\n")
+}
+
+function normalizeHits(
+  resp: unknown
+): SandraEntityHit[] {
+  if (Array.isArray(resp)) return resp as SandraEntityHit[]
+  if (resp && typeof resp === "object") {
+    // Sandra's tools use a few different envelope shapes:
+    //   sandra_semantic_search → {results: [...]}
+    //   sandra_search          → {items: [...]}
+    //   sandra_list_entities   → {entities: [...]}
+    // We accept all three so callers don't need to care.
+    const r = resp as Record<string, unknown>
+    if (Array.isArray(r.results)) return r.results as SandraEntityHit[]
+    if (Array.isArray(r.items)) return r.items as SandraEntityHit[]
+    if (Array.isArray(r.entities)) return r.entities as SandraEntityHit[]
+  }
+  return []
+}
+
+function hitToSearchResult(h: SandraEntityHit): SandraSearchHit {
+  const refs = h.refs || {}
+  const isEntity = h.factory === FACTORY_ENTITY
+  const isRaw = h.factory === FACTORY_SESSION_RAW
+  let statement: string
+  let predicate: string | undefined
+  if (isRaw) {
+    // Session raw: storage holds the full transcript. Cap it so a single
+    // massive session doesn't crowd out other facts from the answer prompt.
+    const raw = h.storage || ""
+    statement = raw.length > 3000 ? raw.slice(0, 3000) + " […transcript truncated]" : raw
+    predicate = "raw_session_transcript"
+  } else if (isEntity) {
+    // Entities: synthesize a descriptor so the answer prompt treats entity
+    // and fact hits uniformly.
+    statement = [refs.name, refs.kind ? `(${refs.kind})` : "", refs.notes ? `: ${refs.notes}` : ""]
+      .filter(Boolean)
+      .join(" ")
+      .trim()
+    predicate = `entity:${refs.kind || "other"}`
+  } else {
+    statement = h.storage || refs.value || ""
+    predicate = refs.predicate
+  }
+  return {
+    predicate,
+    statement,
+    value: refs.value || refs.name,
+    event_date: refs.event_date,
+    session_timestamp: refs.session_timestamp || refs.timestamp,
+    source: refs.source,
+    turn_idx: refs.turn_idx,
+    session_id: refs.session_id,
+    similarity: h.similarity ?? h.score,
+  }
+}
+
+/**
+ * Extract significant content words from a natural-language query, for use
+ * with sandra_search's LIKE-based matching. Strategy: strip punctuation,
+ * drop stopwords (question words, articles, common prepositions, pronouns,
+ * short tokens), cap at 3 to keep the parallel request count bounded.
+ *
+ * Returns an empty array if no keyword qualifies (rare) — in that case the
+ * caller just falls back to semantic search only.
+ */
+function extractKeywords(query: string): string[] {
+  const STOPWORDS = new Set([
+    // question words
+    "what", "when", "where", "who", "whom", "why", "how", "which", "whose",
+    // auxiliaries / copulas
+    "is", "are", "was", "were", "be", "been", "being", "am",
+    "do", "does", "did", "have", "has", "had", "can", "could", "will", "would",
+    "should", "may", "might", "must", "shall",
+    // articles & determiners
+    "the", "a", "an", "this", "that", "these", "those", "some", "any", "every",
+    // prepositions
+    "in", "of", "to", "for", "on", "at", "by", "with", "from", "about",
+    "into", "over", "under", "between", "through",
+    // pronouns
+    "i", "me", "my", "mine", "you", "your", "yours", "he", "him", "his",
+    "she", "her", "hers", "it", "its", "we", "us", "our", "ours",
+    "they", "them", "their", "theirs",
+    // common verbs / fillers
+    "also", "just", "very", "really", "much", "many", "more", "most",
+    "like", "want", "need", "tell", "say", "said",
+    // conjunctions
+    "and", "or", "but", "so", "if", "then", "because", "than",
+    // other
+    "not", "no", "yes", "now", "here", "there", "ever", "never",
+    "please", "mine", "ours",
+  ])
+  const tokens = query
+    .toLowerCase()
+    .replace(/[^a-z0-9\s]/g, " ")
+    .split(/\s+/)
+    .filter((w) => w.length >= 4 && !STOPWORDS.has(w))
+  // Deduplicate while preserving order.
+  const seen = new Set<string>()
+  const unique: string[] = []
+  for (const w of tokens) {
+    if (!seen.has(w)) {
+      seen.add(w)
+      unique.push(w)
+    }
+  }
+  return unique.slice(0, 3)
+}
+
+export default SandraProvider
diff --git a/src/providers/sandra/mcp-client.ts b/src/providers/sandra/mcp-client.ts
new file mode 100644
index 0000000..f091875
--- /dev/null
+++ b/src/providers/sandra/mcp-client.ts
@@ -0,0 +1,264 @@
+/**
+ * JSON-RPC 2.0 client for Sandra's MCP HTTP server. Ports the behavior of
+ * `sandra/benchmark/longmemeval/src/sandra_mcp_client.py` so both benchmarks
+ * share the exact same wire-level contract.
+ *
+ * Session lifecycle: first tool call lazily sends `initialize`, then the
+ * `notifications/initialized` fire-and-forget, carrying the `Mcp-Session-Id`
+ * returned by the server on subsequent requests. The server may respond in
+ * either `application/json` or `text/event-stream`; we extract the first
+ * `data:` line when streamed.
+ */
+
+import { logger } from "../../utils/logger"
+
+export interface SandraConfig {
+  url: string
+  token?: string
+  timeoutMs?: number
+}
+
+export class SandraMCPError extends Error {}
+
+interface JsonRpcEnvelope {
+  jsonrpc: "2.0"
+  id?: string
+  method?: string
+  params?: unknown
+  result?: { content?: Array<{ type: string; text: string }> } & Record<string, unknown>
+  error?: { code: number; message: string; data?: unknown }
+}
+
+export class SandraMCPClient {
+  private readonly url: string
+  private readonly token: string | undefined
+  private readonly timeoutMs: number
+  private sessionId: string | undefined
+  private initialized = false
+  private initPromise: Promise<void> | null = null
+
+  constructor(config: SandraConfig) {
+    this.url = config.url
+    this.token = config.token
+    this.timeoutMs = config.timeoutMs ?? 600_000
+  }
+
+  private headers(): Record<string, string> {
+    const h: Record<string, string> = {
+      "Content-Type": "application/json",
+      Accept: "application/json, text/event-stream",
+    }
+    if (this.token) h["Authorization"] = `Bearer ${this.token}`
+    if (this.sessionId) h["Mcp-Session-Id"] = this.sessionId
+    return h
+  }
+
+  private async post(body: unknown): Promise<Response> {
+    const controller = new AbortController()
+    const timer = setTimeout(() => controller.abort(), this.timeoutMs)
+    try {
+      return await fetch(this.url, {
+        method: "POST",
+        headers: this.headers(),
+        body: JSON.stringify(body),
+        signal: controller.signal,
+      })
+    } finally {
+      clearTimeout(timer)
+    }
+  }
+
+  private async ensureInitialized(): Promise<void> {
+    if (this.initialized) return
+    if (!this.initPromise) {
+      this.initPromise = this.doInitialize()
+    }
+    await this.initPromise
+  }
+
+  private async doInitialize(): Promise<void> {
+    const initPayload = {
+      jsonrpc: "2.0" as const,
+      id: `init-${crypto.randomUUID().slice(0, 8)}`,
+      method: "initialize",
+      params: {
+        protocolVersion: "2025-11-25",
+        capabilities: {},
+        clientInfo: { name: "memorybench-sandra", version: "0.1" },
+      },
+    }
+    const resp = await this.post(initPayload)
+    if (!resp.ok) {
+      const text = await resp.text()
+      throw new SandraMCPError(
+        `MCP initialize failed: HTTP ${resp.status}: ${text.slice(0, 300)}`
+      )
+    }
+    const sid = resp.headers.get("Mcp-Session-Id") || resp.headers.get("mcp-session-id")
+    if (sid) this.sessionId = sid
+    // drain body so the connection is returned to the pool
+    await resp.text()
+
+    const notify = {
+      jsonrpc: "2.0" as const,
+      method: "notifications/initialized",
+      params: {},
+    }
+    // fire-and-forget
+    try {
+      await this.post(notify)
+    } catch (e) {
+      logger.debug(`notifications/initialized post failed: ${e}`)
+    }
+    this.initialized = true
+  }
+
+  async callTool<T = unknown>(name: string, args: Record<string, unknown>): Promise<T> {
+    await this.ensureInitialized()
+
+    const payload = {
+      jsonrpc: "2.0" as const,
+      id: crypto.randomUUID(),
+      method: "tools/call",
+      params: { name, arguments: args },
+    }
+
+    // The built-in Bun HTTP server in Sandra occasionally drops concurrent
+    // keep-alive connections ("socket connection was closed unexpectedly").
+    // One retry with exponential backoff handles the spurious drops without
+    // masking real problems.
+    const MAX_ATTEMPTS = 3
+    let lastErr: unknown
+    let resp: Response | null = null
+    for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
+      try {
+        resp = await this.post(payload)
+        break
+      } catch (e) {
+        lastErr = e
+        if (attempt === MAX_ATTEMPTS) throw e
+        const msg = e instanceof Error ? e.message : String(e)
+        logger.debug(
+          `MCP ${name} attempt ${attempt}/${MAX_ATTEMPTS} failed: ${msg}. Retrying...`
+        )
+        await new Promise((r) => setTimeout(r, 250 * attempt))
+      }
+    }
+    if (!resp) throw lastErr
+
+    if (!resp.ok) {
+      const text = await resp.text()
+      throw new SandraMCPError(`HTTP ${resp.status} from MCP: ${text.slice(0, 500)}`)
+    }
+    const raw = (await resp.text()).trim()
+
+    let body = raw
+    if (body.startsWith("event:") || body.startsWith("data:")) {
+      for (const line of body.split("\n")) {
+        if (line.startsWith("data:")) {
+          body = line.slice(5).trim()
+          break
+        }
+      }
+    }
+
+    let data: JsonRpcEnvelope
+    try {
+      data = JSON.parse(body) as JsonRpcEnvelope
+    } catch (e) {
+      throw new SandraMCPError(`Non-JSON body from MCP: ${body.slice(0, 300)}`)
+    }
+    if (data.error) {
+      throw new SandraMCPError(
+        `MCP error ${data.error.code}: ${data.error.message}`
+      )
+    }
+    const result = data.result ?? {}
+    const content = result.content
+    if (Array.isArray(content) && content.length > 0 && content[0]?.type === "text") {
+      const text = content[0].text
+      try {
+        return JSON.parse(text) as T
+      } catch {
+        return text as unknown as T
+      }
+    }
+    return result as unknown as T
+  }
+
+  // Typed convenience wrappers ------------------------------------------------
+
+  batch(payload: {
+    concepts?: string[]
+    entities?: Array<{ factory: string; refs: Record<string, unknown>; storage?: string }>
+    triplets?: Array<{
+      subject: string | number
+      verb: string | number
+      target: string | number
+      refs?: Record<string, unknown>
+    }>
+  }): Promise<SandraBatchResult> {
+    return this.callTool<SandraBatchResult>("sandra_batch", {
+      concepts: payload.concepts ?? [],
+      entities: payload.entities ?? [],
+      triplets: payload.triplets ?? [],
+    })
+  }
+
+  semanticSearch(args: {
+    query: string
+    factory?: string
+    limit?: number
+    threshold?: number
+    include_storage?: boolean
+    fields?: string[]
+  }): Promise<{ results?: SandraEntityHit[] } | SandraEntityHit[]> {
+    return this.callTool("sandra_semantic_search", args as Record<string, unknown>)
+  }
+
+  search(args: {
+    query?: string
+    factory?: string
+    field?: string
+    limit?: number
+    fields?: string[]
+    include_storage?: boolean
+  }): Promise<{ results?: SandraEntityHit[] } | SandraEntityHit[]> {
+    return this.callTool("sandra_search", args as Record<string, unknown>)
+  }
+
+  embedAll(args: { factory?: string; limit?: number }): Promise<{ embedded?: number }> {
+    return this.callTool("sandra_embed_all", args as Record<string, unknown>)
+  }
+
+  listEntities(args: {
+    factory: string
+    limit?: number
+    offset?: number
+    fields?: string[]
+    include_storage?: boolean
+  }): Promise<{ entities?: SandraEntityHit[] } | SandraEntityHit[]> {
+    return this.callTool("sandra_list_entities", args as Record<string, unknown>)
+  }
+}
+
+export interface SandraBatchResult {
+  summary?: {
+    entitiesCreated?: number
+    tripletsCreated?: number
+    conceptsCreated?: number
+    refsAttached?: number
+  }
+  entities?: Array<{ id?: number; factory?: string }>
+  [key: string]: unknown
+}
+
+export interface SandraEntityHit {
+  id?: number
+  factory?: string
+  refs?: Record<string, string>
+  storage?: string
+  similarity?: number
+  score?: number
+  [key: string]: unknown
+}
diff --git a/src/providers/sandra/prompts.ts b/src/providers/sandra/prompts.ts
new file mode 100644
index 0000000..07e4cf5
--- /dev/null
+++ b/src/providers/sandra/prompts.ts
@@ -0,0 +1,173 @@
+/**
+ * Prompts for Sandra:
+ *   1. EXTRACT_SYSTEM_PROMPT — used at ingestion to turn a raw session into
+ *      structured {entities, facts}. Copied VERBATIM from the Python harness
+ *      at `sandra/benchmark/longmemeval/src/ingest_claude.py:EXTRACT_SYSTEM_PROMPT`
+ *      so the extraction quality is identical and the 76% baseline transfers.
+ *      Do not edit without a matching edit on the Python side.
+ *
+ *   2. answerPrompt — how memorybench's answering LLM reads Sandra's retrieval
+ *      output. Mirrors the shape mem0 and zep provide so the downstream
+ *      reader treats all three providers comparably.
+ */
+
+import type { ProviderPrompts } from "../../types/prompts"
+
+export const EXTRACT_SYSTEM_PROMPT = `You extract a structured knowledge-graph from a single chat session so a downstream system can later answer questions about it.
+
+Output a single JSON object with this exact shape:
+
+{
+  "entities": [
+    {"name": "<canonical name>", "kind": "person|place|product|activity|topic|other", "notes": "<optional 1-sentence disambiguation>"}
+  ],
+  "facts": [
+    {
+      "predicate": "<snake_case short form, e.g. repainted_bedroom, bought_running_shoes, assistant_listed_jobs>",
+      "statement": "<one sentence restating the fact verbatim>",
+      "subject": "user|assistant",
+      "source": "user|assistant|synthesis",
+      "object": "<entity name matching the entities list above, or empty>",
+      "value": "<scalar value, color, price, list-contents, whatever is the answer-worthy content>",
+      "event_date": "<ABSOLUTE date in YYYY-MM-DD form when the event happened, empty if not stated>",
+      "turn_idx": <integer turn index 0-based in the session>,
+      "typed_refs": {"<unit_concept>": <number>, ...}
+    }
+  ]
+}
+
+Extraction rules:
+
+1. **User facts** (source="user"): anything the user asserts — events, preferences, purchases, corrections, emotional states, plans. One fact per atomic claim. Do not merge.
+
+2. **Assistant facts** (source="assistant"): when the assistant provides structured information that might be referenced later — numbered lists, ranked recommendations, direct answers to factual questions, enumerated items. Put the full list verbatim in \`value\` (enumerated items separated by \\n so "1. Foo\\n2. Bar\\n..." is preserved). Use predicates like "assistant_listed_<topic>" or "assistant_recommended_<topic>".
+
+3. **Preference synthesis** (source="synthesis"): if the user reveals preference patterns across multiple turns (likes, dislikes, habits, constraints, goals), emit ONE fact at the end with predicate="preference_profile" whose \`value\` describes the user's preferences as a short rubric (e.g. "prefers healthy high-protein meals with quinoa and roasted vegetables, open to new twists on chicken salads and wraps, avoids high-calorie options"). This is how you answer "suggest me something" style questions.
+
+4. **Temporal absolutes**: ALWAYS resolve relative dates to absolute YYYY-MM-DD using the session timestamp as anchor. "Yesterday" when session is 2023/05/21 (Sun) → "2023-05-20". "Last Thursday" when session is 2023/05/27 (Sat) → "2023-05-25". If a date range is given ("March 9th" this year), use the session year. If truly unknown, leave empty — do NOT leave the relative phrase.
+
+5. **Knowledge updates**: if the user corrects a previous statement, emit BOTH the old and the new fact — each with its own turn_idx. Downstream picks the most recent.
+
+6. **Consistency**: use the same entity name consistently. Cite entities by name from the entities list above.
+
+7. **No hallucination**. If unsure, skip it. Better to miss a fact than invent one.
+
+8. **Output valid JSON only**. No prose, no markdown code fences.
+
+9. **Quantified events — ONE FACT PER EVENT, WITH A TYPED NUMERIC REF** (narrow rule, applies ONLY when the source explicitly states a count or measurement about an action):
+
+   The \`value\` string stays as a human-readable label (e.g. "$40 lights", "3 rides", "347 miles YTD"). But you ALSO emit a **\`typed_refs\`** dict where the KEY uses the \`dimension[unit]\` convention and the VALUE is a clean number. This is the authoritative numeric channel that Sandra sums at read time WITHOUT mixing units.
+
+   Convention: snake_case dimension + bracketed unit. Use ONLY these keys (case matters — all lowercase):
+    - \`cost[usd]\`, \`cost[eur]\`, \`cost[gbp]\` — money amounts
+    - \`distance[miles]\`, \`distance[km]\`, \`distance[m]\` — distances
+    - \`duration[minutes]\`, \`duration[hours]\`, \`duration[days]\` — durations
+    - \`count[times]\` — pure integer event counts (rides, attendances, festivals)
+    - \`mass[kg]\`, \`mass[lbs]\` — weights
+    - \`temperature[c]\`, \`temperature[f]\` — temperatures
+
+   Examples:
+    - "I bought a Bell Zephyr helmet for $120" → \`{"predicate": "bought_helmet", "value": "$120 Bell Zephyr", "typed_refs": {"cost[usd]": 120}}\`
+    - "I rode the Mako, Kraken, and Manta rollercoasters at SeaWorld" → \`{"predicate": "rode_rollercoasters_at_seaworld", "value": "3 rides: Mako, Kraken, Manta", "typed_refs": {"count[times]": 3}}\`
+    - "I rode Space Mountain three times at Disneyland" → \`{"predicate": "rode_rollercoaster_at_disneyland", "value": "3 rides on Space Mountain", "typed_refs": {"count[times]": 3}}\`
+    - "I attended the Portland Film Festival" → \`{"predicate": "attended_film_festival_portland", "value": "Portland Film Festival", "typed_refs": {"count[times]": 1}}\`
+    - "I've clocked 347 miles since the start of the year" → \`{"predicate": "tracked_bike_mileage", "value": "347 miles YTD", "typed_refs": {"distance[miles]": 347}}\`
+    - "I ran a 5K in 35 minutes" → \`{"predicate": "ran_5k", "value": "35 min finish", "typed_refs": {"duration[minutes]": 35, "distance[km]": 5}}\`
+    - "My goal is 1000 miles by summer" → \`{"predicate": "goal_bike_mileage", "value": "1000 miles by summer", "typed_refs": {}}\`  # GOAL, not actual — do NOT add typed_ref (would be summed wrongly)
+    - "I'm due to lubricate my chain on May 15th" → \`{"predicate": "due_lubricate_chain", "value": "May 15", "event_date": "2023-05-15", "typed_refs": {}}\`  # date-only
+    - "I prefer quinoa over rice" → \`{"predicate": "prefers_quinoa", "value": "quinoa over rice", "typed_refs": {}}\`  # qualitative
+
+   Rules:
+    - DO NOT pre-aggregate — emit ONE fact per event with that event's number.
+    - DO NOT mix unit types in one fact — split into separate facts if needed.
+    - DO NOT add typed_ref for GOALS, INTENTIONS, FUTURE plans, or QUOTED PRICES that aren't a real spend (e.g. "this bike costs $1000" without a purchase = no typed_ref).
+    - DO NOT invent — if no number is stated, omit typed_refs.
+    - For dates use \`event_date\` (YYYY-MM-DD), NEVER a typed_ref.
+    - typed_refs is OPTIONAL — only emit when the fact is a genuinely quantified ACTUAL ACTION the user took.`
+
+export interface SandraSearchHit {
+  predicate?: string
+  statement?: string
+  value?: string
+  event_date?: string
+  session_timestamp?: string
+  source?: string
+  similarity?: number
+  turn_idx?: string
+  session_id?: string
+}
+
+export function buildSandraAnswerPrompt(
+  question: string,
+  context: unknown[],
+  questionDate?: string
+): string {
+  const hits = context as SandraSearchHit[]
+  // Sort hits so facts with explicit event_date come first (useful for
+  // temporal questions), then by session_timestamp descending (most recent
+  // first, useful for knowledge-update questions where the latest fact wins).
+  // This is a stable reorder — retrieval still decides what's in the list.
+  const sorted = [...hits].sort((a, b) => {
+    const aHasDate = a.event_date ? 1 : 0
+    const bHasDate = b.event_date ? 1 : 0
+    if (aHasDate !== bHasDate) return bHasDate - aHasDate
+    const aTs = a.session_timestamp || ""
+    const bTs = b.session_timestamp || ""
+    return bTs.localeCompare(aTs)
+  })
+  const rendered = sorted
+    .map((h, i) => {
+      const dateTag = h.event_date ? ` [event: ${h.event_date}]` : ""
+      const sessTag = h.session_timestamp ? ` [session: ${h.session_timestamp}]` : ""
+      const srcTag = h.source ? ` (${h.source})` : ""
+      const predTag = h.predicate ? ` <${h.predicate}>` : ""
+      const head = h.statement || h.value || JSON.stringify(h)
+      return `[${i + 1}]${dateTag}${sessTag}${srcTag}${predTag} ${head}`
+    })
+    .join("\n\n")
+
+  const dateLine = questionDate ? `\nQuestion asked on: ${questionDate}\n` : ""
+  // Prompt style mirrors mem0's buildMem0AnswerPrompt so Sandra is evaluated
+  // under the same answer-generation constraints. The only adaptations are
+  // (a) telling the model the entries are structured facts with timestamps
+  // (Sandra's retrieval shape), and (b) asking it to synthesize across facts
+  // when the question calls for a recommendation or preference answer.
+  return `You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.
+
+Key instructions:
+- Analyze the facts with their timestamps carefully.
+- **For knowledge-update questions** (e.g. "current X", "latest Y"): prioritize the most recent fact (facts are sorted most-recent first). Earlier contradicting facts are stale.
+- **For enumeration / counting questions** (e.g. "how many", "list all"): count distinct events across ALL retrieved facts. Do not stop after finding one.
+- **For temporal comparison** (e.g. "which first, X or Y"): compare event_date or session_timestamp on both items. Answer with the one whose date is earlier.
+- **For preference / suggestion questions**: synthesize a recommendation from the user's prior behavior shown in the facts.
+- Convert relative time references ("last year", "two months ago") into absolute dates using the session timestamps.
+- Look for direct evidence in the facts; do not invent.
+- Don't confuse character names with actual users.
+
+Fact entry format:
+- \`[N]\` index of the fact
+- \`[event: YYYY-MM-DD]\` when the event happened, if known
+- \`[session: ... timestamp]\` when this fact was recorded in conversation
+- \`(user|assistant|synthesis)\` source of the fact
+- \`<predicate>\` short semantic predicate
+- Then the fact statement
+
+Special entries: facts with predicate \`<raw_session_transcript>\` are
+**verbatim conversation dumps** from a past session. They contain details
+that may not appear as structured facts elsewhere in the list. When
+structured facts are silent on a question, read the raw transcripts
+carefully — the answer often lives there as literal quoted text.
+${dateLine}
+Facts:
+${rendered}
+
+Question: ${question}
+
+Answer concisely and directly.`
+}
+
+export const SANDRA_PROMPTS: ProviderPrompts = {
+  answerPrompt: buildSandraAnswerPrompt,
+}
+
+export default SANDRA_PROMPTS
diff --git a/src/providers/sandra/schema.ts b/src/providers/sandra/schema.ts
new file mode 100644
index 0000000..f32070e
--- /dev/null
+++ b/src/providers/sandra/schema.ts
@@ -0,0 +1,46 @@
+/**
+ * Fixed factory + verb vocabulary shared with the in-repo Python harness at
+ * `sandra/benchmark/longmemeval/src/schema.py`. Keeping these strings identical
+ * means a database populated by either harness is queryable by the other, and
+ * that any graph-level comparison between the two Sandra benchmarks is on
+ * equal footing.
+ */
+
+export const FACTORY_FACT = "lme_fact"
+export const FACTORY_ENTITY = "lme_entity"
+export const FACTORY_SESSION = "lme_session"
+// Raw session transcript: one entity per UnifiedSession, storage holds the
+// full turn dump. Exists as a retrieval fallback when the structured
+// extractor misses details that are still present verbatim in the source
+// conversation. Semantic search can pick these up when fact-level retrieval
+// comes up empty.
+export const FACTORY_SESSION_RAW = "lme_session_raw"
+
+export const VERB_MENTIONS = "lme_mentions"
+export const VERB_STATES = "lme_states"
+export const VERB_ABOUT = "lme_about"
+
+export const SESSION_FIELDS = ["session_id", "instance_id", "timestamp"] as const
+
+export interface ExtractedEntity {
+  name: string
+  kind: string
+  notes?: string
+}
+
+export interface ExtractedFact {
+  predicate: string
+  statement: string
+  subject?: string
+  source?: "user" | "assistant" | "synthesis"
+  object?: string
+  value?: string
+  event_date?: string
+  turn_idx?: number
+  typed_refs?: Record<string, number>
+}
+
+export interface SessionExtraction {
+  entities: ExtractedEntity[]
+  facts: ExtractedFact[]
+}
diff --git a/src/types/provider.ts b/src/types/provider.ts
index cdc0228..e2ded5d 100644
--- a/src/types/provider.ts
+++ b/src/types/provider.ts
@@ -47,4 +47,4 @@ export interface Provider {
   clear(containerTag: string): Promise<void>
 }
 
-export type ProviderName = "supermemory" | "mem0" | "zep" | "filesystem" | "rag"
+export type ProviderName = "supermemory" | "mem0" | "zep" | "filesystem" | "rag" | "sandra"
diff --git a/src/utils/config.ts b/src/utils/config.ts
index 8ac1268..fd902c1 100644
--- a/src/utils/config.ts
+++ b/src/utils/config.ts
@@ -6,6 +6,8 @@ export interface Config {
   openaiApiKey: string
   anthropicApiKey: string
   googleApiKey: string
+  sandraUrl: string
+  sandraToken: string
 }
 
 export const config: Config = {
@@ -16,9 +18,15 @@ export const config: Config = {
   openaiApiKey: process.env.OPENAI_API_KEY || "",
   anthropicApiKey: process.env.ANTHROPIC_API_KEY || "",
   googleApiKey: process.env.GOOGLE_API_KEY || "",
+  sandraUrl: process.env.SANDRA_URL || "http://localhost:8090/mcp",
+  sandraToken: process.env.SANDRA_TOKEN || "",
 }
 
-export function getProviderConfig(provider: string): { apiKey: string; baseUrl?: string } {
+export function getProviderConfig(provider: string): {
+  apiKey: string
+  baseUrl?: string
+  token?: string
+} {
   switch (provider) {
     case "supermemory":
       return { apiKey: config.supermemoryApiKey, baseUrl: config.supermemoryBaseUrl }
@@ -30,6 +38,14 @@ export function getProviderConfig(provider: string): { apiKey: string; baseUrl?:
       return { apiKey: config.openaiApiKey } // Filesystem uses OpenAI for memory extraction
     case "rag":
       return { apiKey: config.openaiApiKey } // RAG provider uses OpenAI for embeddings
+    case "sandra":
+      // Sandra's API key slot carries ANTHROPIC_API_KEY (used by the
+      // ingestion extractor). baseUrl + token address the MCP HTTP server.
+      return {
+        apiKey: config.anthropicApiKey,
+        baseUrl: config.sandraUrl,
+        token: config.sandraToken,
+      }
     default:
       throw new Error(`Unknown provider: ${provider}`)
   }