From 3e1f4eee4b5b6e3533bdb399b9949aa8a3289333 Mon Sep 17 00:00:00 2001 From: m Date: Fri, 8 May 2026 13:19:27 +0200 Subject: [PATCH] fix(t-paliad-155): cold-start timeout headroom + ban DB fallbacks in skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shim's run-turn hard timeout: 60s → 120s (PALIADIN_TIMEOUT_S default). First turn after a fresh tmux session stacks claude boot + skill load + MCP discovery + first reasoning, which can blow past 60s before the response file lands. Aligned the surrounding timeouts so 120s is actually reachable: - callShim ctx (paliadin_remote.go): 70s → 130s (shim 120 + 10 SSH). - runPaliadinTurnAsync handler ctx: 120s → 150s (shim 120 + 10 SSH + 20 paliad-side overhead). SKILL.md hard rule #6 added: never fall back to psql / curl PostgREST / nix-shell — mcp__supabase__execute_sql is the only DB tool. If it's unavailable, write a short 'DB nicht erreichbar — bitte paliad neu deployen oder PALIADIN_REMOTE_CWD prüfen' response immediately with classifier_tag=meta. Saves the 60s-fallback-dance failure mode m hit on the cwd-misconfig turn. --- internal/handlers/paliadin.go | 6 ++++-- internal/services/paliadin_remote.go | 5 +++-- scripts/paliadin-shim | 2 +- scripts/skills/paliadin/SKILL.md | 1 + 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/internal/handlers/paliadin.go b/internal/handlers/paliadin.go index 2361216..08dc4c8 100644 --- a/internal/handlers/paliadin.go +++ b/internal/handlers/paliadin.go @@ -166,7 +166,9 @@ func handlePaliadinTurn(w http.ResponseWriter, r *http.Request) { } // runPaliadinTurnAsync executes the turn and writes events into ch. -// Uses a 2-minute hard timeout independently of the originating request. +// Uses a 150 s hard timeout independently of the originating request, +// which leaves headroom over the shim's 120 s run-turn cap + SSH +// overhead (t-paliad-155: cold-start safety for skill + MCP discovery). func runPaliadinTurnAsync(turnID uuid.UUID, req services.TurnRequest, ch chan<- turnEvent) { defer func() { // Drain + close. The SSE handler reads until the channel closes. @@ -182,7 +184,7 @@ func runPaliadinTurnAsync(turnID uuid.UUID, req services.TurnRequest, ch chan<- }, }) - ctx, cancel := newDetachedContext(120 * time.Second) + ctx, cancel := newDetachedContext(150 * time.Second) defer cancel() result, err := paliadinSvc.RunTurn(ctx, req) diff --git a/internal/services/paliadin_remote.go b/internal/services/paliadin_remote.go index bdf1d13..d335c45 100644 --- a/internal/services/paliadin_remote.go +++ b/internal/services/paliadin_remote.go @@ -261,8 +261,9 @@ func (s *RemotePaliadinService) callShim(ctx context.Context, args ...string) ([ } sshArgs = append(sshArgs, args...) - // Shim's run-turn timeout is 60 s; +10 s gives SSH some overhead. - c, cancel := context.WithTimeout(ctx, 70*time.Second) + // Shim's run-turn timeout is 120 s (cold start = claude boot + skill + // load + MCP discovery + first reasoning); +10 s gives SSH overhead. + c, cancel := context.WithTimeout(ctx, 130*time.Second) defer cancel() cmd := exec.CommandContext(c, "ssh", sshArgs...) diff --git a/scripts/paliadin-shim b/scripts/paliadin-shim index 6032069..dd1951a 100755 --- a/scripts/paliadin-shim +++ b/scripts/paliadin-shim @@ -30,7 +30,7 @@ set -euo pipefail umask 077 readonly RESPONSE_DIR="${PALIADIN_RESPONSE_DIR:-/tmp/paliadin}" -readonly TIMEOUT_S="${PALIADIN_TIMEOUT_S:-60}" +readonly TIMEOUT_S="${PALIADIN_TIMEOUT_S:-120}" # Working directory for the claude pane. Must be the paliad repo root so # claude picks up .mcp.json (project-scoped Supabase MCP) — without it, # the SKILL.md SQL recipes fail with no DB tool. Override via env var if diff --git a/scripts/skills/paliadin/SKILL.md b/scripts/skills/paliadin/SKILL.md index ffc9e4a..2ff2ef9 100644 --- a/scripts/skills/paliadin/SKILL.md +++ b/scripts/skills/paliadin/SKILL.md @@ -73,6 +73,7 @@ Nur IDs/Slugs benutzen, die du tatsächlich aus einem Tool-Call hast. **Niemals 3. **Read-only.** Schreibe nichts in die DB. Wenn m etwas ändern will, sag wo in Paliad. 4. **Visibility-Gate respektieren.** Auch wenn m global_admin ist: jede projekt-bezogene Abfrage MUSS `paliad.can_see_project(project_id)` enthalten. 5. **Nicht über andere User spekulieren** — frag nach Projekt-ID/Slug, selbst wenn m sie namentlich erwähnt. +6. **Niemals auf `psql`, `curl PostgREST`, `nix-shell` oder andere DB-Fallbacks ausweichen.** Die einzig zulässige DB-Quelle ist `mcp__supabase__execute_sql` (project-scoped MCP). Wenn dieser Tool-Aufruf nicht verfügbar ist, schreibe sofort: *"DB nicht erreichbar — bitte paliad neu deployen oder PALIADIN_REMOTE_CWD prüfen."* mit `classifier_tag: meta`. Niemals 60+ Sekunden im Fallback-Tanz verbringen — der Backend-Timeout schlägt sonst zu, bevor du eine Antwort schreibst. ## Beispiel — vollständige Antwortdatei