diff --git a/duckdbservice/duckdb_pair.go b/duckdbservice/duckdb_pair.go index 71a191ed..25a96a63 100644 --- a/duckdbservice/duckdb_pair.go +++ b/duckdbservice/duckdb_pair.go @@ -88,6 +88,16 @@ func (*nonClosingConnector) Close() error { return nil } // path as openBaseDB (threads, memory_limit, extensions, profiling, cache // settings); the Control DB is intentionally minimal. func OpenDuckDBPair(cfg server.Config, username string) (*DuckDBPair, error) { + // This is the worker-process DB factory (standalone uses server.openBaseDB), + // so it's the single place to pin every worker's persistent-secret directory + // under DataDir. That makes the location deterministic and wipeable on + // recycle, and stops stale secrets in DuckDB's $HOME default from loading and + // colliding with the in-memory secrets re-created at activation. + // ConfigureMainDB (below) applies the setting. (We do NOT disable persistent + // secrets outright: that breaks the DuckLake ATTACH path, which needs the + // local_file secret storage registered.) + cfg.PinSecretDirectory = true + dsn, err := server.DuckDBDSN(cfg, username) if err != nil { return nil, err diff --git a/duckdbservice/secret_recycle_test.go b/duckdbservice/secret_recycle_test.go new file mode 100644 index 00000000..1332dc02 --- /dev/null +++ b/duckdbservice/secret_recycle_test.go @@ -0,0 +1,125 @@ +package duckdbservice + +import ( + "database/sql" + "os" + "path/filepath" + "testing" + + _ "github.com/duckdb/duckdb-go/v2" + + "github.com/posthog/duckgres/server" +) + +// TestWipePersistedSecretsOnRecycle is the unit-level guard for the +// worker-recycle behavior that Warmup relies on: a CREATE PERSISTENT SECRET +// left on disk by a prior worker incarnation must not survive into the next +// one. We do it here rather than in tests/k8s because the meaningful boundary +// is a *process restart over a surviving DataDir* (container restart within a +// pod, or a persistent volume / warm node). A tests/k8s pod-kill would be +// vacuous: worker DataDir is an EmptyDir, so pod replacement already yields a +// fresh empty /data and the test would pass even with the wipe removed. +// +// The test is explicitly non-vacuous: the middle phase asserts the persistent +// secret *does* survive a plain reopen (the original bug), and only the final +// phase — after wipePersistedSecrets — asserts it is gone (the fix). +func TestWipePersistedSecretsOnRecycle(t *testing.T) { + cfg := server.Config{DataDir: t.TempDir()} + secretDir := server.SecretDirectory(cfg) + if secretDir == "" { + t.Fatal("SecretDirectory returned empty for a non-empty DataDir") + } + + const secretName = "test_recycle_secret" + + // open returns a DB pinned at the same secret_directory the worker uses. + // We pin it directly here rather than via ConfigureMainDB(PinSecretDirectory) + // because this test exercises the wipe mechanism in isolation; pinning the + // directory is all that's needed to land a persistent secret where the wipe + // will look for it. Caller closes the DB. + open := func() *sql.DB { + t.Helper() + db, err := sql.Open("duckdb", ":memory:?allow_unsigned_extensions=true") + if err != nil { + t.Fatalf("open duckdb: %v", err) + } + if _, err := db.Exec("SET secret_directory = '" + secretDir + "'"); err != nil { + _ = db.Close() + t.Fatalf("set secret_directory: %v", err) + } + return db + } + + persistentSecretCount := func(db *sql.DB) int { + t.Helper() + var n int + if err := db.QueryRow( + "SELECT count(*) FROM duckdb_secrets() WHERE name = ? AND persistent", + secretName, + ).Scan(&n); err != nil { + t.Fatalf("query duckdb_secrets: %v", err) + } + return n + } + + // Phase 1: create a persistent secret. It should land on disk under the + // pinned secret_directory. + db := open() + if _, err := db.Exec( + "CREATE PERSISTENT SECRET " + secretName + " (TYPE s3, KEY_ID 'a', SECRET 'b')", + ); err != nil { + t.Fatalf("create persistent secret: %v", err) + } + if got := persistentSecretCount(db); got != 1 { + t.Fatalf("after create: persistent secret count = %d, want 1", got) + } + _ = db.Close() + + entries, err := os.ReadDir(secretDir) + if err != nil { + t.Fatalf("read secret_directory: %v", err) + } + if len(entries) == 0 { + t.Fatalf("secret_directory %s is empty; persistent secret was not written to disk", secretDir) + } + + // Also plant a file in the legacy default location (/.duckdb/ + // stored_secrets) — where a worker whose HOME is its DataDir accumulated + // secrets before we pinned secret_directory. The recycle must clear this too. + legacyDir := server.LegacySecretDirectory(cfg) + if err := os.MkdirAll(legacyDir, 0o750); err != nil { + t.Fatalf("mkdir legacy secret dir: %v", err) + } + legacyFile := filepath.Join(legacyDir, "old_secret.duckdb_secret") + if err := os.WriteFile(legacyFile, []byte("stale"), 0o600); err != nil { + t.Fatalf("write legacy secret file: %v", err) + } + + // Phase 2 (non-vacuous control): reopen WITHOUT recycling. The persistent + // secret must reload from disk — this is exactly the state that, combined + // with the in-memory secret re-created at activation, produces the + // "secret occurs in multiple storage backends" ambiguity in production. + db = open() + if got := persistentSecretCount(db); got != 1 { + t.Fatalf("after plain reopen: persistent secret count = %d, want 1 "+ + "(test is vacuous if the secret didn't survive a reopen)", got) + } + _ = db.Close() + + // Phase 3: recycle. This is the exact call Warmup makes on worker startup. + wipePersistedSecrets(cfg) + if _, err := os.Stat(secretDir); !os.IsNotExist(err) { + t.Fatalf("secret_directory still exists after wipe (stat err = %v)", err) + } + if _, err := os.Stat(legacyFile); !os.IsNotExist(err) { + t.Fatalf("legacy secret file still exists after wipe (stat err = %v)", err) + } + + // Phase 4: reopen after recycle. The persistent secret must be gone. + db = open() + if got := persistentSecretCount(db); got != 0 { + t.Fatalf("after recycle: persistent secret count = %d, want 0 "+ + "(wipePersistedSecrets did not prevent resurrection)", got) + } + _ = db.Close() +} diff --git a/duckdbservice/service.go b/duckdbservice/service.go index ba26a1d9..72edf26e 100644 --- a/duckdbservice/service.go +++ b/duckdbservice/service.go @@ -156,6 +156,40 @@ func NewDuckDBService(cfg ServiceConfig) *DuckDBService { } } +// wipePersistedSecrets removes DuckDB's persistent-secret directories for this +// config. Called on worker startup so persisted secrets never survive a +// recycle. Best-effort: a failure is logged, not fatal — a stale secret is a +// correctness annoyance, not a reason to refuse to start. No-op when no DataDir +// is configured. +// +// We wipe two locations: +// - the pinned secret_directory (server.SecretDirectory), where new secrets +// would land under the current config, and +// - the legacy DuckDB default /.duckdb/stored_secrets, which is where +// secrets accumulated before we pinned the directory (a worker whose +// HOME is its DataDir resolves DuckDB's default there). Without this, the +// historical files that caused the original ambiguity would linger on a +// persistent DataDir forever. +func wipePersistedSecrets(cfg server.Config) { + if cfg.DataDir == "" { + return + } + dirs := []string{ + server.SecretDirectory(cfg), + server.LegacySecretDirectory(cfg), + } + for _, dir := range dirs { + if dir == "" { + continue + } + if err := os.RemoveAll(dir); err != nil { + slog.Warn("Failed to wipe persisted secret directory on worker startup.", "secret_directory", dir, "error", err) + continue + } + slog.Info("Wiped persisted secret directory on worker startup.", "secret_directory", dir) + } +} + // Warmup performs one-time initialization of the shared DuckDB instance. // This loads extensions and attaches catalogs so that subsequent session // creations are nearly instantaneous. @@ -166,6 +200,21 @@ func (p *SessionPool) Warmup() error { return nil } + // Recycle hook: a worker process starting up is the boundary between one + // tenant runtime and the next on this disk (a serverless/shared-warm worker + // is single-org-bound for its whole life, so process start == recycle). + // Wipe any persisted secrets left on disk before DuckDB's SecretManager + // reads them, so a CREATE PERSISTENT SECRET from a prior incarnation can't + // resurface and collide with the in-memory secret re-created at activation, + // and can't leak across tenants. + // + // This matters whenever DataDir survives across worker processes: a + // container restart within the same pod (EmptyDir survives that — it's only + // destroyed on pod deletion), or any persistent-volume / warm-node setup. On + // a fresh pod with a fresh EmptyDir the directory is already empty and this + // is a no-op. + wipePersistedSecrets(p.cfg) + start := time.Now() slog.Info("Pre-warming worker DuckDB instance...") diff --git a/server/conn.go b/server/conn.go index 239d80df..cd7c6dc5 100644 --- a/server/conn.go +++ b/server/conn.go @@ -877,6 +877,17 @@ func isDuckDBUtilityCommand(query string) bool { "UNLOAD", "CREATE SECRET", "DROP SECRET", + // DuckDB's own disambiguation hint for a secret that exists in more + // than one storage backend is "DROP SECRET ...". + // Those variants don't start with "DROP SECRET" (the next token is + // PERSISTENT/TEMPORARY), so without these explicit prefixes they fall + // through to the PG transpiler and die with `syntax error at or near + // "PERSISTENT"` — i.e. the exact command DuckDB tells the user to run is + // unrunnable. Pass them straight through. (List order is irrelevant: + // matching is by string prefix, and "DROP SECRET" is not a prefix of + // "DROP PERSISTENT SECRET", so the two never collide.) + "DROP PERSISTENT SECRET", + "DROP TEMPORARY SECRET", "CREATE PERSISTENT SECRET", "CREATE TEMPORARY SECRET", "CREATE OR REPLACE SECRET", diff --git a/server/conn_test.go b/server/conn_test.go index ab5c0e9f..d8a8cf04 100644 --- a/server/conn_test.go +++ b/server/conn_test.go @@ -21,6 +21,43 @@ import ( "github.com/posthog/duckgres/server/wire" ) +func TestIsDuckDBUtilityCommand(t *testing.T) { + tests := []struct { + name string + query string + want bool + }{ + // Secret DDL must bypass the PG transpiler — DuckDB-only syntax. + {"create secret", `CREATE SECRET foo (TYPE s3)`, true}, + {"create or replace secret", `CREATE OR REPLACE SECRET foo (TYPE s3)`, true}, + {"drop secret", `DROP SECRET foo`, true}, + {"drop secret if exists", `DROP SECRET IF EXISTS foo`, true}, + // Regression: DuckDB's own ambiguity hint is "DROP + // SECRET ...". These don't start with "DROP SECRET", so before the fix + // they fell through to pg_query and failed with a syntax error on the + // PERSISTENT/TEMPORARY keyword. + {"drop persistent secret", `DROP PERSISTENT SECRET foo`, true}, + {"drop persistent secret if exists quoted", `DROP PERSISTENT SECRET IF EXISTS "portola_warehouse_prod_s3"`, true}, + {"drop temporary secret", `DROP TEMPORARY SECRET foo`, true}, + {"drop temporary secret if exists", `DROP TEMPORARY SECRET IF EXISTS foo`, true}, + {"case insensitive", `drop persistent secret foo`, true}, + {"leading comment", "-- cleanup\nDROP PERSISTENT SECRET foo", true}, + // Not utility commands — must still go through the transpiler. + {"select", `SELECT * FROM duckdb_secrets()`, false}, + {"drop table", `DROP TABLE foo`, false}, + // Guard the word-boundary check: a name that merely starts with the + // keyword text must not be mistaken for the command. + {"drop secretive table", `DROP TABLE secretive`, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isDuckDBUtilityCommand(tt.query); got != tt.want { + t.Errorf("isDuckDBUtilityCommand(%q) = %v, want %v", tt.query, got, tt.want) + } + }) + } +} + func TestIsEmptyQuery(t *testing.T) { tests := []struct { name string diff --git a/server/persistent_secrets_test.go b/server/persistent_secrets_test.go new file mode 100644 index 00000000..4cc190cb --- /dev/null +++ b/server/persistent_secrets_test.go @@ -0,0 +1,110 @@ +package server + +import ( + "database/sql" + "os" + "path/filepath" + "testing" + + _ "github.com/duckdb/duckdb-go/v2" +) + +// TestPinSecretDirectory is the regression guard for the worker-side fix: with +// Config.PinSecretDirectory set (as duckdbservice.OpenDuckDBPair does for every +// worker), ConfigureMainDB redirects DuckDB's persistent-secret storage to +// /secrets. That means: +// +// (a) a CREATE PERSISTENT SECRET lands under /secrets (deterministic, +// wipeable), and +// (b) a stale secret sitting in DuckDB's $HOME default is no longer loaded, so +// it can't collide with the in-memory secret re-created at activation. +// +// We deliberately do NOT disable persistent secrets (that breaks DuckLake's +// ATTACH, which needs the local_file secret storage registered), so persistent +// secrets still work — they're just relocated and wiped on recycle. +func TestPinSecretDirectory(t *testing.T) { + dir := t.TempDir() + secretDir := SecretDirectory(Config{DataDir: dir}) + + db, err := sql.Open("duckdb", ":memory:?allow_unsigned_extensions=true") + if err != nil { + t.Fatalf("open duckdb: %v", err) + } + defer func() { _ = db.Close() }() + + cfg := Config{DataDir: dir, PinSecretDirectory: true} + if err := ConfigureMainDB(db, cfg, "worker"); err != nil { + t.Fatalf("ConfigureMainDB: %v", err) + } + + // (a) a persistent secret must still be creatable and must land under the + // pinned directory. + if _, err := db.Exec("CREATE PERSISTENT SECRET pinned (TYPE s3, KEY_ID 'a', SECRET 'b')"); err != nil { + t.Fatalf("create persistent secret: %v", err) + } + entries, err := os.ReadDir(secretDir) + if err != nil { + t.Fatalf("read pinned secret_directory %s: %v", secretDir, err) + } + if len(entries) == 0 { + t.Errorf("persistent secret did not land under pinned secret_directory %s", secretDir) + } +} + +// TestPinSecretDirectoryIgnoresLegacyDefault proves point (b): a secret written +// to DuckDB's old $HOME-style default location is not loaded once the directory +// is pinned elsewhere. +func TestPinSecretDirectoryIgnoresLegacyDefault(t *testing.T) { + dir := t.TempDir() + + // Seed a persistent secret into the *legacy* location. + legacy := LegacySecretDirectory(Config{DataDir: dir}) + seed, err := sql.Open("duckdb", ":memory:?allow_unsigned_extensions=true") + if err != nil { + t.Fatalf("open seed duckdb: %v", err) + } + if _, err := seed.Exec("SET secret_directory = '" + legacy + "'"); err != nil { + t.Fatalf("seed set secret_directory: %v", err) + } + if _, err := seed.Exec("CREATE PERSISTENT SECRET stale (TYPE s3, KEY_ID 'a', SECRET 'b')"); err != nil { + t.Fatalf("seed create persistent secret: %v", err) + } + _ = seed.Close() + + // A worker-style connection pins to /secrets, not the legacy path. + db, err := sql.Open("duckdb", ":memory:?allow_unsigned_extensions=true") + if err != nil { + t.Fatalf("open duckdb: %v", err) + } + defer func() { _ = db.Close() }() + if err := ConfigureMainDB(db, Config{DataDir: dir, PinSecretDirectory: true}, "worker"); err != nil { + t.Fatalf("ConfigureMainDB: %v", err) + } + + var loaded int + if err := db.QueryRow("SELECT count(*) FROM duckdb_secrets() WHERE name = 'stale'").Scan(&loaded); err != nil { + t.Fatalf("query duckdb_secrets: %v", err) + } + if loaded != 0 { + t.Errorf("stale secret in the legacy location was loaded (count=%d); pinning should redirect away from it", loaded) + } +} + +// TestSecretDirectory documents the path derivation the wipe and pinning rely on. +func TestSecretDirectory(t *testing.T) { + if got := SecretDirectory(Config{}); got != "" { + t.Errorf("SecretDirectory with empty DataDir = %q, want \"\"", got) + } + want := filepath.Join("/data", "secrets") + if got := SecretDirectory(Config{DataDir: "/data"}); got != want { + t.Errorf("SecretDirectory(/data) = %q, want %q", got, want) + } + + if got := LegacySecretDirectory(Config{}); got != "" { + t.Errorf("LegacySecretDirectory with empty DataDir = %q, want \"\"", got) + } + wantLegacy := filepath.Join("/data", ".duckdb", "stored_secrets") + if got := LegacySecretDirectory(Config{DataDir: "/data"}); got != wantLegacy { + t.Errorf("LegacySecretDirectory(/data) = %q, want %q", got, wantLegacy) + } +} diff --git a/server/server.go b/server/server.go index 1c583da4..df95294b 100644 --- a/server/server.go +++ b/server/server.go @@ -266,6 +266,22 @@ type Config struct { // while TLS, authentication, and query execution happen in child processes. ProcessIsolation bool + // PinSecretDirectory pins DuckDB's persistent-secret directory under DataDir + // (/secrets) instead of DuckDB's $HOME default. Set for worker + // processes (see duckdbservice.OpenDuckDBPair): it makes the persisted-secret + // location deterministic and wipeable on recycle, and — by redirecting away + // from the $HOME default — stops stale secrets in the old location from being + // loaded and colliding with the in-memory secrets re-created at activation. + // Left false for standalone, so upgrading doesn't relocate an existing + // standalone user's persistent secrets. + // + // Note: we deliberately do NOT also set allow_persistent_secrets=false. + // Disabling persistent secrets unregisters DuckDB's local_file secret + // storage backend, which the DuckLake ATTACH path depends on ("Unknown + // secret storage found: 'local_file'") — so the directory pinning plus the + // recycle wipe is how workers stay clean. + PinSecretDirectory bool + // MemoryLimit is the DuckDB memory_limit per session (e.g., "4GB"). // If empty, auto-detected from system memory. MemoryLimit string @@ -843,6 +859,36 @@ func DuckDBDSN(cfg Config, username string) (string, error) { return dsn, nil } +// SecretDirectory returns the directory DuckDB should use for persistent +// secrets for this instance, or "" to leave DuckDB's default in place. +// +// DuckDB defaults persistent secrets to $HOME/.duckdb/stored_secrets, +// independent of the database file. On a worker that means a CREATE PERSISTENT +// SECRET lands in whatever $HOME the process happens to have and survives +// across restarts on any non-ephemeral disk — long after the in-memory secret +// of the same name (recreated each activation) gets re-added, which is exactly +// what produces DuckDB's "secret occurs in multiple storage backends" +// ambiguity errors. Pinning it under DataDir makes the location deterministic +// and, crucially, wipeable on worker recycle (see duckdbservice.Warmup). +func SecretDirectory(cfg Config) string { + if cfg.DataDir == "" { + return "" + } + return filepath.Join(cfg.DataDir, "secrets") +} + +// LegacySecretDirectory returns DuckDB's pre-pinning default persistent-secret +// location for a worker whose HOME is its DataDir (/.duckdb/stored_secrets). +// This is where secrets accumulated before SecretDirectory pinning existed, and +// the only reason it's a named helper is so the recycle wipe and this derivation +// stay colocated and can't drift apart. Returns "" when DataDir is unset. +func LegacySecretDirectory(cfg Config) string { + if cfg.DataDir == "" { + return "" + } + return filepath.Join(cfg.DataDir, ".duckdb", "stored_secrets") +} + // ConfigureMainDB applies the per-instance DuckDB settings (threads, memory, // temp dir, extensions, profiling) that the client-query DB needs. Shared // between openBaseDB (single-DB path) and OpenDuckDBPair (shared-connector @@ -884,6 +930,28 @@ func ConfigureMainDB(db *sql.DB, cfg Config, username string) error { return fmt.Errorf("failed to configure extension_directory: %w", err) } + // Pin the persistent-secret directory under DataDir on workers + // (PinSecretDirectory, set by duckdbservice.OpenDuckDBPair). secret_directory + // is a global DuckDB setting, and this must run before the SecretManager's + // first use — it does, since ConfigureMainDB is applied right after open, + // before any ATTACH or CREATE SECRET. Standalone is left at DuckDB's $HOME + // default so an upgrade doesn't relocate an existing user's secrets. + // + // We intentionally do NOT disable persistent secrets here + // (allow_persistent_secrets=false): that unregisters DuckDB's local_file + // secret storage, which the DuckLake ATTACH path requires. Pinning + + // the recycle wipe (see duckdbservice.wipePersistedSecrets) is what keeps + // workers from accumulating stale persisted secrets. + if cfg.PinSecretDirectory { + if secretDir := SecretDirectory(cfg); secretDir != "" { + if _, err := db.Exec(fmt.Sprintf("SET secret_directory = '%s'", secretDir)); err != nil { + slog.Warn("Failed to set DuckDB secret_directory.", "secret_directory", secretDir, "error", err) + } else { + slog.Debug("Set DuckDB secret_directory.", "secret_directory", secretDir) + } + } + } + // Load configured extensions if err := LoadExtensions(db, cfg.Extensions); err != nil { slog.Warn("Failed to load some extensions.", "user", username, "error", err)