Replaces the golang-migrate single-counter tracker with a hand-rolled runner over embed.FS that tracks applied state as a set in paliad.applied_migrations (version PK, name, applied_at, checksum). Closes the parallel-merge skip-hole the 2026-05-20 mig-103 incident exposed (m/paliad#44): a migration whose version is missing from applied_migrations runs on the next deploy regardless of which higher versions are already applied. Gaps are first-class. Slice 1 of the design at docs/design-migration-runner-applied-set-2026-05-20.md. All eight design decisions m-picked = inventor recommendation. Runner contract: - Ensure paliad schema → pg_advisory_lock(hash('paliad.applied_migrations')) → CREATE TABLE IF NOT EXISTS applied_migrations. - bootstrapFromLegacyTracker: if applied_migrations is empty and the legacy paliad.paliad_schema_migrations row is present and clean, INSERT rows 1..N for every on-disk version with checksum=NULL via ON CONFLICT DO NOTHING. Hard-fail if legacy tracker is dirty (operator must recover). - scanEmbeddedMigrations: hard-fail on two .up.sql files sharing a version prefix — the failure mode the post-mortem exposed. - checkNameAgreement: hard-fail on rename-after-apply mismatch (disk name for an already-applied version != DB name). - applyOne: SQL body + INSERT(version, name, now(), sha256(file_bytes)) in one transaction. All-or-nothing per migration. Checksums populated on apply for future drift detection; rows backfilled from the legacy tracker carry NULL (we can't fabricate a hash for what golang-migrate applied historically). Verify-on-deploy intentionally deferred to a focused follow-up — single if-block flip when m wants it. Up-only runner. .down.sql files stay in embed.FS as reference; manual roll-back path is psql + DELETE FROM paliad.applied_migrations WHERE version=N. Zero call sites for migrate.Down in the codebase today. Drops github.com/golang-migrate/migrate/v4 from go.mod (no other importers; verified via grep). Tests: - internal/db/migrate_test.go: TestMigrations_DryRun walks pending = on_disk \\ applied (read from paliad.applied_migrations, missing-table → empty set), runs each in BEGIN/ROLLBACK against the scratch DB. - cmd/server/main_smoke_test.go: TestBootSmoke asserts the applied set equals the on-disk set exactly (not just max-version-match) — catches the skip class the post-mortem documented. Dirty-flag check removed (rows are committed or absent, not 'dirty'). - All 45 service-test call sites of db.ApplyMigrations work unchanged (same signature, same fresh-DB behavior). Follow-up: mig 108_drop_legacy_trackers (DROP paliad.paliad_schema_migrations and public.paliad_schema_migrations) after one or two deploys of burn-in on this slice.
211 lines
6.8 KiB
Go
211 lines
6.8 KiB
Go
// Boot smoke test — assert paliad reaches a serving state.
|
|
//
|
|
// Three checks against TEST_DATABASE_URL:
|
|
//
|
|
// 1. db.ApplyMigrations does not panic and returns nil.
|
|
// 2. paliad.applied_migrations covers every on-disk *.up.sql — no
|
|
// migration was silently skipped, no version is missing. The set
|
|
// contract is stronger than the old single-counter check: applied
|
|
// set must EQUAL on-disk set, not just reach the max version.
|
|
// 3. The handler mux (with /healthz mounted) responds 200 to GET /healthz.
|
|
//
|
|
// This is the lightweight cousin of the migration dry-run gate
|
|
// (internal/db/migrate_test.go): the dry-run catches per-migration syntax
|
|
// errors before merge; this smoke confirms the apply+bind path the
|
|
// container actually runs at boot. Together they cover the mig-098 /
|
|
// mig-099 class of crash-loops end-to-end, plus the mig-103 parallel-merge
|
|
// skip-hole that t-paliad-218 closed (m/paliad#44).
|
|
//
|
|
// Skipped without TEST_DATABASE_URL — matches the rest of the live-DB tests.
|
|
//
|
|
// Design: docs/design-paliad-test-strategy-2026-05-19.md §5 Slice 1 and
|
|
// docs/design-migration-runner-applied-set-2026-05-20.md §6.
|
|
|
|
package main
|
|
|
|
import (
|
|
"database/sql"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
|
|
_ "github.com/lib/pq"
|
|
|
|
"mgit.msbls.de/m/paliad/internal/auth"
|
|
"mgit.msbls.de/m/paliad/internal/db"
|
|
"mgit.msbls.de/m/paliad/internal/handlers"
|
|
)
|
|
|
|
func TestBootSmoke(t *testing.T) {
|
|
url := os.Getenv("TEST_DATABASE_URL")
|
|
if url == "" {
|
|
t.Skip("TEST_DATABASE_URL not set — skipping boot smoke")
|
|
}
|
|
|
|
// (1) Apply migrations end-to-end. The same code path the prod
|
|
// container runs at boot before `http.ListenAndServe`. A regression
|
|
// like mig-098's digit-regex would surface here as a non-nil error.
|
|
if err := db.ApplyMigrations(url); err != nil {
|
|
t.Fatalf("db.ApplyMigrations: %v", err)
|
|
}
|
|
|
|
// (2) Assert the applied set equals the on-disk set. The new runner
|
|
// tracks applied state per-migration; a silently-skipped version
|
|
// would surface as a row missing from paliad.applied_migrations even
|
|
// though max(version) matches. Comparing sets — not just max —
|
|
// catches the failure mode the t-paliad-218 post-mortem documented.
|
|
onDisk := embeddedMigrationVersions(t)
|
|
applied := appliedMigrationVersions(t, url)
|
|
|
|
if missing := setDiff(onDisk, applied); len(missing) > 0 {
|
|
t.Errorf("paliad.applied_migrations missing %d on-disk versions: %v "+
|
|
"(a migration was skipped — investigate before deploying)",
|
|
len(missing), missing)
|
|
}
|
|
if extra := setDiff(applied, onDisk); len(extra) > 0 {
|
|
t.Errorf("paliad.applied_migrations has %d versions with no on-disk file: %v "+
|
|
"(orphan rows — either restore the file or DELETE the row)",
|
|
len(extra), extra)
|
|
}
|
|
|
|
// (3) Mount the public handlers (the same Register call main() makes,
|
|
// minus the DB-backed Services bundle which the /healthz route doesn't
|
|
// need) and assert /healthz returns 200. This is the bind-and-serve
|
|
// half of the smoke: catches a regression that would make /healthz
|
|
// 404 or break the mux registration order.
|
|
//
|
|
// We deliberately do not boot the full main() — that would require
|
|
// SUPABASE_URL, SUPABASE_ANON_KEY, SUPABASE_JWT_SECRET, an open
|
|
// listening socket and a real auth client. The /healthz handler is
|
|
// auth-independent by design, and Register registers it on the outer
|
|
// mux before any DB-backed route, so this minimal setup exercises the
|
|
// exact code path main() takes.
|
|
mux := http.NewServeMux()
|
|
authClient := auth.NewClient("https://test.invalid", "anon-key", []byte("test-secret"))
|
|
handlers.Register(mux, authClient, "", nil)
|
|
|
|
rec := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
|
|
mux.ServeHTTP(rec, req)
|
|
if rec.Code != http.StatusOK {
|
|
t.Errorf("GET /healthz: status=%d, body=%q; want 200 OK", rec.Code, rec.Body.String())
|
|
}
|
|
if body := strings.TrimSpace(rec.Body.String()); body != "ok" {
|
|
t.Errorf("GET /healthz: body=%q; want \"ok\"", body)
|
|
}
|
|
}
|
|
|
|
// embeddedMigrationVersions returns every N where N_*.up.sql exists in
|
|
// internal/db/migrations/ on disk. The boot smoke compares this set
|
|
// against paliad.applied_migrations to detect skipped or orphan
|
|
// migrations.
|
|
//
|
|
// Read from disk (not the embed.FS inside the db package — it's unexported)
|
|
// since the test runs from the repo. The two views must agree for the
|
|
// build to be self-consistent; if they diverge, the smoke test is the
|
|
// wrong place to learn about it (the build is). We trust them to match.
|
|
func embeddedMigrationVersions(t *testing.T) []int {
|
|
t.Helper()
|
|
root, err := repoRoot()
|
|
if err != nil {
|
|
t.Fatalf("locate repo root: %v", err)
|
|
}
|
|
dir := filepath.Join(root, "internal", "db", "migrations")
|
|
entries, err := os.ReadDir(dir)
|
|
if err != nil {
|
|
t.Fatalf("read migrations dir %s: %v", dir, err)
|
|
}
|
|
var versions []int
|
|
for _, e := range entries {
|
|
name := e.Name()
|
|
if !strings.HasSuffix(name, ".up.sql") {
|
|
continue
|
|
}
|
|
base := strings.TrimSuffix(name, ".up.sql")
|
|
underscore := strings.IndexByte(base, '_')
|
|
if underscore <= 0 {
|
|
continue
|
|
}
|
|
v, err := strconv.Atoi(base[:underscore])
|
|
if err != nil {
|
|
continue
|
|
}
|
|
versions = append(versions, v)
|
|
}
|
|
if len(versions) == 0 {
|
|
t.Fatalf("no *.up.sql files found in %s", dir)
|
|
}
|
|
sort.Ints(versions)
|
|
return versions
|
|
}
|
|
|
|
// appliedMigrationVersions reads paliad.applied_migrations and returns
|
|
// the sorted list of versions. Fails the test if the table doesn't exist —
|
|
// db.ApplyMigrations is supposed to have created it by this point.
|
|
func appliedMigrationVersions(t *testing.T, url string) []int {
|
|
t.Helper()
|
|
conn, err := sql.Open("postgres", url)
|
|
if err != nil {
|
|
t.Fatalf("open: %v", err)
|
|
}
|
|
defer conn.Close()
|
|
rows, err := conn.Query(`SELECT version FROM paliad.applied_migrations ORDER BY version`)
|
|
if err != nil {
|
|
t.Fatalf("read applied_migrations: %v", err)
|
|
}
|
|
defer rows.Close()
|
|
var out []int
|
|
for rows.Next() {
|
|
var v int
|
|
if err := rows.Scan(&v); err != nil {
|
|
t.Fatalf("scan: %v", err)
|
|
}
|
|
out = append(out, v)
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
t.Fatalf("rows: %v", err)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// setDiff returns the elements of a that are not in b. Inputs are sorted
|
|
// ascending; output preserves that ordering.
|
|
func setDiff(a, b []int) []int {
|
|
bset := make(map[int]bool, len(b))
|
|
for _, v := range b {
|
|
bset[v] = true
|
|
}
|
|
var out []int
|
|
for _, v := range a {
|
|
if !bset[v] {
|
|
out = append(out, v)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// repoRoot walks upward from the test binary's working directory until it
|
|
// finds a go.mod. `go test` runs in the package dir, so we typically have
|
|
// to climb a couple of levels.
|
|
func repoRoot() (string, error) {
|
|
dir, err := os.Getwd()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
for {
|
|
if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
|
|
return dir, nil
|
|
}
|
|
parent := filepath.Dir(dir)
|
|
if parent == dir {
|
|
return "", os.ErrNotExist
|
|
}
|
|
dir = parent
|
|
}
|
|
}
|