Skip to content

Commit bc83720

Browse files
committed
fix(socket): clear stuck "Reconnecting" indicator after invite accept
Accepting an invite switches the active org and immediately redirects into the workspace, so the socket bootstraps under a just-rotated session. A transient token-mint failure during that window got latched into a permanent state that only a page reload could clear. - Transient (non-401) token failures now fail the handshake fast and retry with a fresh token instead of hanging the full connect timeout; tracked via a failure-mode ref so connect_error keeps reconnecting rather than latching authFailed. - The connect handler now clears isReconnecting, so a healthy socket can never sit showing "Reconnecting...". - authFailed now auto-recovers with exponential backoff (nothing ever called retryConnection), so a transient 401 mid-rotation no longer needs a reload.
1 parent 05408fd commit bc83720

1 file changed

Lines changed: 57 additions & 8 deletions

File tree

apps/sim/app/workspace/providers/socket-provider.tsx

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ import {
1111
useState,
1212
} from 'react'
1313
import { createLogger } from '@sim/logger'
14+
import { getErrorMessage } from '@sim/utils/errors'
1415
import { generateId } from '@sim/utils/id'
16+
import { backoffWithJitter } from '@sim/utils/retry'
1517
import { useParams } from 'next/navigation'
1618
import type { Socket } from 'socket.io-client'
1719
import { getSocketUrl } from '@/lib/core/utils/urls'
@@ -162,6 +164,16 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
162164
const explicitWorkflowIdRef = useRef<string | null>(explicitWorkflowId)
163165
const joinControllerRef = useRef(new SocketJoinController())
164166
const joinRetryTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
167+
/**
168+
* Why the most recent socket-token mint failed. A `transient` failure (rate
169+
* limit, 5xx, network) passes a null token like a true auth failure does, but
170+
* must keep Socket.IO reconnecting rather than latch `authFailed` — the server
171+
* rejects both with the same "Authentication required" message, so the client
172+
* can only tell them apart by remembering why the token was null.
173+
*/
174+
const tokenFailureModeRef = useRef<'none' | 'auth' | 'transient'>('none')
175+
const authRetryTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
176+
const authRetryAttemptRef = useRef(0)
165177

166178
const params = useParams()
167179
const urlWorkflowId = params?.workflowId as string | undefined
@@ -361,21 +373,29 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
361373
auth: async (cb) => {
362374
try {
363375
const freshToken = await generateSocketToken()
376+
tokenFailureModeRef.current = 'none'
364377
cb({ token: freshToken })
365378
} catch (error) {
366-
logger.error('Failed to generate fresh token for connection:', error)
367379
if (error instanceof Error && error.message === 'Authentication required') {
368-
// True auth failure - pass null token, server will reject with "Authentication required"
369-
cb({ token: null })
380+
tokenFailureModeRef.current = 'auth'
381+
logger.error('Failed to generate fresh token for connection:', error)
382+
} else {
383+
tokenFailureModeRef.current = 'transient'
384+
logger.warn('Transient socket token failure, will retry connection', {
385+
error: getErrorMessage(error),
386+
})
370387
}
371-
// For server errors, don't call cb - connection will timeout and Socket.IO will retry
388+
cb({ token: null })
372389
}
373390
},
374391
})
375392

376393
socketInstance.on('connect', () => {
377394
setIsConnected(true)
378395
setIsConnecting(false)
396+
setIsReconnecting(false)
397+
tokenFailureModeRef.current = 'none'
398+
authRetryAttemptRef.current = 0
379399
setCurrentSocketId(socketInstance.id ?? null)
380400
logger.info('Socket connected successfully', {
381401
socketId: socketInstance.id,
@@ -406,11 +426,11 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
406426
setIsConnecting(false)
407427
logger.error('Socket connection error:', { message: error.message })
408428

409-
// Check if this is an authentication failure
410429
const isAuthError =
411-
error.message?.includes('Token validation failed') ||
412-
error.message?.includes('Authentication failed') ||
413-
error.message?.includes('Authentication required')
430+
tokenFailureModeRef.current !== 'transient' &&
431+
(error.message?.includes('Token validation failed') ||
432+
error.message?.includes('Authentication failed') ||
433+
error.message?.includes('Authentication required'))
414434

415435
if (isAuthError) {
416436
logger.warn(
@@ -737,6 +757,35 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
737757
}
738758
}, [user?.id, authFailed])
739759

760+
/**
761+
* Auto-recover from an auth failure. The token mint can 401 transiently while a
762+
* session is mid-rotation (e.g. right after switching active organization on
763+
* invite accept); without this the socket stays dead until a manual page reload,
764+
* since no other caller invokes {@link retryConnection}. Retries with backoff so a
765+
* genuine logged-out session re-mints lazily rather than hammering the endpoint.
766+
*/
767+
useEffect(() => {
768+
if (!authFailed) {
769+
return
770+
}
771+
772+
const attempt = authRetryAttemptRef.current
773+
const delay = backoffWithJitter(attempt + 1, null, { baseMs: 1000, maxMs: 30000 })
774+
authRetryTimeoutRef.current = setTimeout(() => {
775+
authRetryTimeoutRef.current = null
776+
authRetryAttemptRef.current = attempt + 1
777+
logger.info('Auto-retrying socket connection after auth failure', { attempt })
778+
setAuthFailed(false)
779+
}, delay)
780+
781+
return () => {
782+
if (authRetryTimeoutRef.current !== null) {
783+
clearTimeout(authRetryTimeoutRef.current)
784+
authRetryTimeoutRef.current = null
785+
}
786+
}
787+
}, [authFailed])
788+
740789
const hydrationPhase = useWorkflowRegistryStore((s) => s.hydration.phase)
741790

742791
useEffect(() => {

0 commit comments

Comments
 (0)