Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions duckdbservice/duckdb_pair.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ func (*nonClosingConnector) Close() error { return nil }
// path as openBaseDB (threads, memory_limit, extensions, profiling, cache
// settings); the Control DB is intentionally minimal.
func OpenDuckDBPair(cfg server.Config, username string) (*DuckDBPair, error) {
// This is the worker-process DB factory (standalone uses server.openBaseDB),
// so it's the single place to pin every worker's persistent-secret directory
// under DataDir. That makes the location deterministic and wipeable on
// recycle, and stops stale secrets in DuckDB's $HOME default from loading and
// colliding with the in-memory secrets re-created at activation.
// ConfigureMainDB (below) applies the setting. (We do NOT disable persistent
// secrets outright: that breaks the DuckLake ATTACH path, which needs the
// local_file secret storage registered.)
cfg.PinSecretDirectory = true

dsn, err := server.DuckDBDSN(cfg, username)
if err != nil {
return nil, err
Expand Down
125 changes: 125 additions & 0 deletions duckdbservice/secret_recycle_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package duckdbservice

import (
"database/sql"
"os"
"path/filepath"
"testing"

_ "github.com/duckdb/duckdb-go/v2"

"github.com/posthog/duckgres/server"
)

// TestWipePersistedSecretsOnRecycle is the unit-level guard for the
// worker-recycle behavior that Warmup relies on: a CREATE PERSISTENT SECRET
// left on disk by a prior worker incarnation must not survive into the next
// one. We do it here rather than in tests/k8s because the meaningful boundary
// is a *process restart over a surviving DataDir* (container restart within a
// pod, or a persistent volume / warm node). A tests/k8s pod-kill would be
// vacuous: worker DataDir is an EmptyDir, so pod replacement already yields a
// fresh empty /data and the test would pass even with the wipe removed.
//
// The test is explicitly non-vacuous: the middle phase asserts the persistent
// secret *does* survive a plain reopen (the original bug), and only the final
// phase — after wipePersistedSecrets — asserts it is gone (the fix).
func TestWipePersistedSecretsOnRecycle(t *testing.T) {
cfg := server.Config{DataDir: t.TempDir()}
secretDir := server.SecretDirectory(cfg)
if secretDir == "" {
t.Fatal("SecretDirectory returned empty for a non-empty DataDir")
}

const secretName = "test_recycle_secret"

// open returns a DB pinned at the same secret_directory the worker uses.
// We pin it directly here rather than via ConfigureMainDB(PinSecretDirectory)
// because this test exercises the wipe mechanism in isolation; pinning the
// directory is all that's needed to land a persistent secret where the wipe
// will look for it. Caller closes the DB.
open := func() *sql.DB {
t.Helper()
db, err := sql.Open("duckdb", ":memory:?allow_unsigned_extensions=true")
if err != nil {
t.Fatalf("open duckdb: %v", err)
}
if _, err := db.Exec("SET secret_directory = '" + secretDir + "'"); err != nil {
_ = db.Close()
t.Fatalf("set secret_directory: %v", err)
}
return db
}

persistentSecretCount := func(db *sql.DB) int {
t.Helper()
var n int
if err := db.QueryRow(
"SELECT count(*) FROM duckdb_secrets() WHERE name = ? AND persistent",
secretName,
).Scan(&n); err != nil {
t.Fatalf("query duckdb_secrets: %v", err)
}
return n
}

// Phase 1: create a persistent secret. It should land on disk under the
// pinned secret_directory.
db := open()
if _, err := db.Exec(
"CREATE PERSISTENT SECRET " + secretName + " (TYPE s3, KEY_ID 'a', SECRET 'b')",
); err != nil {
t.Fatalf("create persistent secret: %v", err)
}
if got := persistentSecretCount(db); got != 1 {
t.Fatalf("after create: persistent secret count = %d, want 1", got)
}
_ = db.Close()

entries, err := os.ReadDir(secretDir)
if err != nil {
t.Fatalf("read secret_directory: %v", err)
}
if len(entries) == 0 {
t.Fatalf("secret_directory %s is empty; persistent secret was not written to disk", secretDir)
}

// Also plant a file in the legacy default location (<DataDir>/.duckdb/
// stored_secrets) — where a worker whose HOME is its DataDir accumulated
// secrets before we pinned secret_directory. The recycle must clear this too.
legacyDir := server.LegacySecretDirectory(cfg)
if err := os.MkdirAll(legacyDir, 0o750); err != nil {
t.Fatalf("mkdir legacy secret dir: %v", err)
}
legacyFile := filepath.Join(legacyDir, "old_secret.duckdb_secret")
if err := os.WriteFile(legacyFile, []byte("stale"), 0o600); err != nil {
t.Fatalf("write legacy secret file: %v", err)
}

// Phase 2 (non-vacuous control): reopen WITHOUT recycling. The persistent
// secret must reload from disk — this is exactly the state that, combined
// with the in-memory secret re-created at activation, produces the
// "secret occurs in multiple storage backends" ambiguity in production.
db = open()
if got := persistentSecretCount(db); got != 1 {
t.Fatalf("after plain reopen: persistent secret count = %d, want 1 "+
"(test is vacuous if the secret didn't survive a reopen)", got)
}
_ = db.Close()

// Phase 3: recycle. This is the exact call Warmup makes on worker startup.
wipePersistedSecrets(cfg)
if _, err := os.Stat(secretDir); !os.IsNotExist(err) {
t.Fatalf("secret_directory still exists after wipe (stat err = %v)", err)
}
if _, err := os.Stat(legacyFile); !os.IsNotExist(err) {
t.Fatalf("legacy secret file still exists after wipe (stat err = %v)", err)
}

// Phase 4: reopen after recycle. The persistent secret must be gone.
db = open()
if got := persistentSecretCount(db); got != 0 {
t.Fatalf("after recycle: persistent secret count = %d, want 0 "+
"(wipePersistedSecrets did not prevent resurrection)", got)
}
_ = db.Close()
}
49 changes: 49 additions & 0 deletions duckdbservice/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,40 @@ func NewDuckDBService(cfg ServiceConfig) *DuckDBService {
}
}

// wipePersistedSecrets removes DuckDB's persistent-secret directories for this
// config. Called on worker startup so persisted secrets never survive a
// recycle. Best-effort: a failure is logged, not fatal — a stale secret is a
// correctness annoyance, not a reason to refuse to start. No-op when no DataDir
// is configured.
//
// We wipe two locations:
// - the pinned secret_directory (server.SecretDirectory), where new secrets
// would land under the current config, and
// - the legacy DuckDB default <DataDir>/.duckdb/stored_secrets, which is where
// secrets accumulated before we pinned the directory (a worker whose
// HOME is its DataDir resolves DuckDB's default there). Without this, the
// historical files that caused the original ambiguity would linger on a
// persistent DataDir forever.
func wipePersistedSecrets(cfg server.Config) {
if cfg.DataDir == "" {
return
}
dirs := []string{
server.SecretDirectory(cfg),
server.LegacySecretDirectory(cfg),
}
for _, dir := range dirs {
if dir == "" {
continue
}
if err := os.RemoveAll(dir); err != nil {
slog.Warn("Failed to wipe persisted secret directory on worker startup.", "secret_directory", dir, "error", err)
continue
}
slog.Info("Wiped persisted secret directory on worker startup.", "secret_directory", dir)
}
}

// Warmup performs one-time initialization of the shared DuckDB instance.
// This loads extensions and attaches catalogs so that subsequent session
// creations are nearly instantaneous.
Expand All @@ -166,6 +200,21 @@ func (p *SessionPool) Warmup() error {
return nil
}

// Recycle hook: a worker process starting up is the boundary between one
// tenant runtime and the next on this disk (a serverless/shared-warm worker
// is single-org-bound for its whole life, so process start == recycle).
// Wipe any persisted secrets left on disk before DuckDB's SecretManager
// reads them, so a CREATE PERSISTENT SECRET from a prior incarnation can't
// resurface and collide with the in-memory secret re-created at activation,
// and can't leak across tenants.
//
// This matters whenever DataDir survives across worker processes: a
// container restart within the same pod (EmptyDir survives that — it's only
// destroyed on pod deletion), or any persistent-volume / warm-node setup. On
// a fresh pod with a fresh EmptyDir the directory is already empty and this
// is a no-op.
wipePersistedSecrets(p.cfg)

start := time.Now()
slog.Info("Pre-warming worker DuckDB instance...")

Expand Down
11 changes: 11 additions & 0 deletions server/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,17 @@ func isDuckDBUtilityCommand(query string) bool {
"UNLOAD",
"CREATE SECRET",
"DROP SECRET",
// DuckDB's own disambiguation hint for a secret that exists in more
// than one storage backend is "DROP <PERSISTENT|TEMPORARY> SECRET ...".
// Those variants don't start with "DROP SECRET" (the next token is
// PERSISTENT/TEMPORARY), so without these explicit prefixes they fall
// through to the PG transpiler and die with `syntax error at or near
// "PERSISTENT"` — i.e. the exact command DuckDB tells the user to run is
// unrunnable. Pass them straight through. (List order is irrelevant:
// matching is by string prefix, and "DROP SECRET" is not a prefix of
// "DROP PERSISTENT SECRET", so the two never collide.)
"DROP PERSISTENT SECRET",
"DROP TEMPORARY SECRET",
"CREATE PERSISTENT SECRET",
"CREATE TEMPORARY SECRET",
"CREATE OR REPLACE SECRET",
Expand Down
37 changes: 37 additions & 0 deletions server/conn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,43 @@ import (
"github.com/posthog/duckgres/server/wire"
)

func TestIsDuckDBUtilityCommand(t *testing.T) {
tests := []struct {
name string
query string
want bool
}{
// Secret DDL must bypass the PG transpiler — DuckDB-only syntax.
{"create secret", `CREATE SECRET foo (TYPE s3)`, true},
{"create or replace secret", `CREATE OR REPLACE SECRET foo (TYPE s3)`, true},
{"drop secret", `DROP SECRET foo`, true},
{"drop secret if exists", `DROP SECRET IF EXISTS foo`, true},
// Regression: DuckDB's own ambiguity hint is "DROP <PERSISTENT|TEMPORARY>
// SECRET ...". These don't start with "DROP SECRET", so before the fix
// they fell through to pg_query and failed with a syntax error on the
// PERSISTENT/TEMPORARY keyword.
{"drop persistent secret", `DROP PERSISTENT SECRET foo`, true},
{"drop persistent secret if exists quoted", `DROP PERSISTENT SECRET IF EXISTS "portola_warehouse_prod_s3"`, true},
{"drop temporary secret", `DROP TEMPORARY SECRET foo`, true},
{"drop temporary secret if exists", `DROP TEMPORARY SECRET IF EXISTS foo`, true},
{"case insensitive", `drop persistent secret foo`, true},
{"leading comment", "-- cleanup\nDROP PERSISTENT SECRET foo", true},
// Not utility commands — must still go through the transpiler.
{"select", `SELECT * FROM duckdb_secrets()`, false},
{"drop table", `DROP TABLE foo`, false},
// Guard the word-boundary check: a name that merely starts with the
// keyword text must not be mistaken for the command.
{"drop secretive table", `DROP TABLE secretive`, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isDuckDBUtilityCommand(tt.query); got != tt.want {
t.Errorf("isDuckDBUtilityCommand(%q) = %v, want %v", tt.query, got, tt.want)
}
})
}
}

func TestIsEmptyQuery(t *testing.T) {
tests := []struct {
name string
Expand Down
110 changes: 110 additions & 0 deletions server/persistent_secrets_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package server

import (
"database/sql"
"os"
"path/filepath"
"testing"

_ "github.com/duckdb/duckdb-go/v2"
)

// TestPinSecretDirectory is the regression guard for the worker-side fix: with
// Config.PinSecretDirectory set (as duckdbservice.OpenDuckDBPair does for every
// worker), ConfigureMainDB redirects DuckDB's persistent-secret storage to
// <DataDir>/secrets. That means:
//
// (a) a CREATE PERSISTENT SECRET lands under <DataDir>/secrets (deterministic,
// wipeable), and
// (b) a stale secret sitting in DuckDB's $HOME default is no longer loaded, so
// it can't collide with the in-memory secret re-created at activation.
//
// We deliberately do NOT disable persistent secrets (that breaks DuckLake's
// ATTACH, which needs the local_file secret storage registered), so persistent
// secrets still work — they're just relocated and wiped on recycle.
func TestPinSecretDirectory(t *testing.T) {
dir := t.TempDir()
secretDir := SecretDirectory(Config{DataDir: dir})

db, err := sql.Open("duckdb", ":memory:?allow_unsigned_extensions=true")
if err != nil {
t.Fatalf("open duckdb: %v", err)
}
defer func() { _ = db.Close() }()

cfg := Config{DataDir: dir, PinSecretDirectory: true}
if err := ConfigureMainDB(db, cfg, "worker"); err != nil {
t.Fatalf("ConfigureMainDB: %v", err)
}

// (a) a persistent secret must still be creatable and must land under the
// pinned directory.
if _, err := db.Exec("CREATE PERSISTENT SECRET pinned (TYPE s3, KEY_ID 'a', SECRET 'b')"); err != nil {
t.Fatalf("create persistent secret: %v", err)
}
entries, err := os.ReadDir(secretDir)
if err != nil {
t.Fatalf("read pinned secret_directory %s: %v", secretDir, err)
}
if len(entries) == 0 {
t.Errorf("persistent secret did not land under pinned secret_directory %s", secretDir)
}
}

// TestPinSecretDirectoryIgnoresLegacyDefault proves point (b): a secret written
// to DuckDB's old $HOME-style default location is not loaded once the directory
// is pinned elsewhere.
func TestPinSecretDirectoryIgnoresLegacyDefault(t *testing.T) {
dir := t.TempDir()

// Seed a persistent secret into the *legacy* location.
legacy := LegacySecretDirectory(Config{DataDir: dir})
seed, err := sql.Open("duckdb", ":memory:?allow_unsigned_extensions=true")
if err != nil {
t.Fatalf("open seed duckdb: %v", err)
}
if _, err := seed.Exec("SET secret_directory = '" + legacy + "'"); err != nil {
t.Fatalf("seed set secret_directory: %v", err)
}
if _, err := seed.Exec("CREATE PERSISTENT SECRET stale (TYPE s3, KEY_ID 'a', SECRET 'b')"); err != nil {
t.Fatalf("seed create persistent secret: %v", err)
}
_ = seed.Close()

// A worker-style connection pins to <DataDir>/secrets, not the legacy path.
db, err := sql.Open("duckdb", ":memory:?allow_unsigned_extensions=true")
if err != nil {
t.Fatalf("open duckdb: %v", err)
}
defer func() { _ = db.Close() }()
if err := ConfigureMainDB(db, Config{DataDir: dir, PinSecretDirectory: true}, "worker"); err != nil {
t.Fatalf("ConfigureMainDB: %v", err)
}

var loaded int
if err := db.QueryRow("SELECT count(*) FROM duckdb_secrets() WHERE name = 'stale'").Scan(&loaded); err != nil {
t.Fatalf("query duckdb_secrets: %v", err)
}
if loaded != 0 {
t.Errorf("stale secret in the legacy location was loaded (count=%d); pinning should redirect away from it", loaded)
}
}

// TestSecretDirectory documents the path derivation the wipe and pinning rely on.
func TestSecretDirectory(t *testing.T) {
if got := SecretDirectory(Config{}); got != "" {
t.Errorf("SecretDirectory with empty DataDir = %q, want \"\"", got)
}
want := filepath.Join("/data", "secrets")
if got := SecretDirectory(Config{DataDir: "/data"}); got != want {
t.Errorf("SecretDirectory(/data) = %q, want %q", got, want)
}

if got := LegacySecretDirectory(Config{}); got != "" {
t.Errorf("LegacySecretDirectory with empty DataDir = %q, want \"\"", got)
}
wantLegacy := filepath.Join("/data", ".duckdb", "stored_secrets")
if got := LegacySecretDirectory(Config{DataDir: "/data"}); got != wantLegacy {
t.Errorf("LegacySecretDirectory(/data) = %q, want %q", got, wantLegacy)
}
}
Loading
Loading