11import type { Span } from '@opentelemetry/api'
2+ import { db } from '@sim/db'
3+ import { workspace } from '@sim/db/schema'
24import { createLogger } from '@sim/logger'
3- import { toError } from '@sim/utils/errors'
5+ import { getPostgresConstraintName , getPostgresErrorCode , toError } from '@sim/utils/errors'
6+ import { eq } from 'drizzle-orm'
47import { type NextRequest , NextResponse } from 'next/server'
58import { billingUpdateCostContract } from '@/lib/api/contracts/subscription'
69import { parseRequest } from '@/lib/api/server'
@@ -17,6 +20,35 @@ import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
1720
1821const logger = createLogger ( 'BillingUpdateCostAPI' )
1922
23+ /**
24+ * Resolves the request-supplied workspace to one that exists in this
25+ * deployment. Workspace attribution on the usage ledger is best-effort:
26+ * self-hosted and headless clients bill through this endpoint with workspace
27+ * IDs from their own databases, and `usage_log.workspace_id` carries an FK to
28+ * `workspace`, so stamping a foreign ID would fail the entire flush with an
29+ * FK violation and strand real cost in the caller's dead-letter queue.
30+ * Unknown workspaces are recorded unattributed instead — billing is keyed on
31+ * the user's billing entity and never depends on the workspace.
32+ */
33+ async function resolveAttributableWorkspaceId (
34+ requestId : string ,
35+ workspaceId : string | undefined
36+ ) : Promise < string | undefined > {
37+ if ( ! workspaceId ) return undefined
38+
39+ const [ row ] = await db
40+ . select ( { id : workspace . id } )
41+ . from ( workspace )
42+ . where ( eq ( workspace . id , workspaceId ) )
43+ . limit ( 1 )
44+ if ( row ) return row . id
45+
46+ logger . warn ( `[${ requestId } ] Workspace not found in this deployment; recording unattributed` , {
47+ workspaceId,
48+ } )
49+ return undefined
50+ }
51+
2052/**
2153 * POST /api/billing/update-cost
2254 * Update user cost with a pre-calculated cost value (internal API key auth required)
@@ -129,6 +161,8 @@ async function updateCostInner(req: NextRequest, span: Span): Promise<NextRespon
129161 source,
130162 } )
131163
164+ const attributedWorkspaceId = await resolveAttributableWorkspaceId ( requestId , workspaceId )
165+
132166 // Go sends the request's CUMULATIVE cost, possibly more than once (a
133167 // mid-loop provider-error flush, then the recovered terminal flush, plus
134168 // abort-race duplicates). Record it as a monotonic top-up: one ledger row
@@ -141,7 +175,7 @@ async function updateCostInner(req: NextRequest, span: Span): Promise<NextRespon
141175 if ( idempotencyKey ) {
142176 const result = await recordCumulativeUsage ( {
143177 userId,
144- workspaceId,
178+ workspaceId : attributedWorkspaceId ,
145179 source,
146180 model,
147181 cost,
@@ -160,7 +194,7 @@ async function updateCostInner(req: NextRequest, span: Span): Promise<NextRespon
160194 } else {
161195 await recordUsage ( {
162196 userId,
163- workspaceId,
197+ workspaceId : attributedWorkspaceId ,
164198 entries : [
165199 {
166200 category : 'model' ,
@@ -229,8 +263,16 @@ async function updateCostInner(req: NextRequest, span: Span): Promise<NextRespon
229263 } catch ( error ) {
230264 const duration = Date . now ( ) - startTime
231265
266+ // Surface the underlying Postgres failure (e.g. 23503 FK violation vs a
267+ // lock timeout) — Drizzle's "Failed query" wrapper alone cannot
268+ // distinguish them, which made the dead-workspace incident undiagnosable
269+ // from logs.
270+ const pgCode = getPostgresErrorCode ( error )
271+ const pgConstraint = getPostgresConstraintName ( error )
232272 logger . error ( `[${ requestId } ] Cost update failed` , {
233273 error : toError ( error ) . message ,
274+ ...( pgCode && { pgCode } ) ,
275+ ...( pgConstraint && { pgConstraint } ) ,
234276 stack : error instanceof Error ? error . stack : undefined ,
235277 duration,
236278 } )
0 commit comments