Files
paliad/internal/handlers/paliadin.go
m 97a412498d feat(t-paliad-155): real Claude SKILL.md + per-user tmux session
Move Paliadin's persona + response protocol from a tmux-keystroke-injected
system prompt into a real Claude skill at ~/.claude/skills/paliadin/SKILL.md
(repo source: scripts/skills/paliadin/SKILL.md, install script:
scripts/install-paliadin-skill). Claude's skill router auto-matches the
[PALIADIN:<uuid>] envelope on every turn, so the protocol contract
survives /clear, fresh sessions, and pane restarts — root-cause fix for
the post-/clear stuck-spinner that triggered this task.

Per-user tmux session keying: each Paliad user gets a session named
<prefix>-<userid8> (first 8 hex chars of UUID). One persistent session
per user, conversation history accumulates per visit, ResetSession kills
the session entirely. Health-check cache becomes per-session.

Service-side simplifications:
- paliadin_prompt.go (paliadinSystemPrompt) deleted; trailer parser stays
  in paliadin.go.
- paliadin_remote.go: ensureBootstrapped removed; healthGate takes a
  session arg + caches per-key; ResetSession derives session from UserID
  and shells out to 'reset <session>'.
- paliadin.go (LocalPaliadinService): per-user pane cache, ensurePane
  takes UserID, no more in-process system-prompt send.
- Paliadin interface: ResetSession now takes UserID.

Shim refactor (scripts/paliadin-shim):
- All verbs accept the tmux session as their first positional arg.
- 'bootstrap' verb removed (skill replaces it).
- 'reset' kills the named session via tmux kill-session.
- Session name validated against [A-Za-z0-9_.-]{1,64}.

Env var rename: PALIADIN_TMUX_SESSION -> PALIADIN_SESSION_PREFIX (semantic
shift from literal session name to per-user prefix); CLAUDE.md updated.

Tests cover per-session health caching, session-name derivation,
ResetSession kill-session shape, and health-cache eviction on reset.
2026-05-08 12:42:57 +02:00

355 lines
11 KiB
Go

package handlers
// paliadin.go — HTTP/SSE handlers for the Paliadin PoC (t-paliad-146).
//
// Design: docs/design-paliadin-2026-05-07.md §0.5.
//
// Three user-facing surfaces:
// GET /paliadin — chat panel page shell
// POST /api/paliadin/turn — initiate a turn, returns {turn_id, sse_url}
// GET /api/paliadin/stream/{id} — SSE stream of the turn
// POST /api/paliadin/reset — /clear the conversation
// GET /admin/paliadin — monitoring dashboard (global_admin)
// GET /api/admin/paliadin/stats — stats JSON
// GET /api/admin/paliadin/turns — recent turns JSON
//
// Routes register only when the PaliadinService is wired (which only
// happens when PALIADIN_ENABLED=true at boot). On prod, where it's
// false by default, none of these URLs exist — they 404 like any
// unrouted path.
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"sync"
"time"
"github.com/google/uuid"
"mgit.msbls.de/m/paliad/internal/services"
)
// newDetachedContext returns a context with timeout that is independent
// of any incoming request — needed so the Claude-via-tmux turn isn't
// cancelled when the originating POST returns ahead of the SSE stream.
func newDetachedContext(timeout time.Duration) (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), timeout)
}
// paliadinSvc is the live Paliadin backend. nil when DATABASE_URL was
// unset (the service depends on the audit table). Set by Register() at
// boot. The concrete type is decided in cmd/server/main.go: local-tmux
// PoC, remote-via-SSH (mRiver), or a disabled stub.
var paliadinSvc services.Paliadin
// requirePaliadinOwner gates every paliadin handler to the single
// owner email (services.PaliadinOwnerEmail = m). Anyone else gets a
// 404 — the gate is a "this URL doesn't exist for you" pretence
// rather than a 403, so a curious colleague can't even confirm the
// route is wired.
func requirePaliadinOwner(w http.ResponseWriter, r *http.Request) bool {
if paliadinSvc == nil {
http.NotFound(w, r)
return false
}
uid, ok := requireUser(w, r)
if !ok {
return false
}
owner, err := paliadinSvc.IsOwner(r.Context(), uid)
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return false
}
if !owner {
http.NotFound(w, r)
return false
}
return true
}
// pendingTurns is an in-memory map of turn_id → result channel. The POST
// /api/paliadin/turn endpoint kicks off the work + writes a synthetic
// turn record; the GET /api/paliadin/stream/{id} endpoint reads from
// the channel + emits SSE events. PoC scope: single user, in-process
// state. Production v1 would use a Postgres-backed queue or pgnotify.
var (
pendingMu sync.Mutex
pendingTurns = map[uuid.UUID]chan turnEvent{}
)
// turnEvent is one SSE event for a turn-in-flight.
type turnEvent struct {
Kind string `json:"kind"` // meta | content | end | error
Data map[string]any `json:"data"`
}
// turnRequest is the JSON body of POST /api/paliadin/turn.
type turnRequest struct {
UserMessage string `json:"user_message"`
SessionID string `json:"session_id"`
PageOrigin string `json:"page_origin,omitempty"`
}
// turnResponse is what POST /api/paliadin/turn returns.
type turnResponse struct {
TurnID string `json:"turn_id"`
SSEURL string `json:"sse_url"`
}
// handlePaliadinPage serves the static /paliadin chat panel. Gated to
// the single Paliadin owner (m); every other authenticated user gets
// a 404 — the route effectively does not exist for them.
func handlePaliadinPage(w http.ResponseWriter, r *http.Request) {
if !requirePaliadinOwner(w, r) {
return
}
http.ServeFile(w, r, "dist/paliadin.html")
}
// handleAdminPaliadinPage serves the /admin/paliadin monitoring page.
// Same owner gate — even other global_admins can't see this surface.
func handleAdminPaliadinPage(w http.ResponseWriter, r *http.Request) {
if !requirePaliadinOwner(w, r) {
return
}
http.ServeFile(w, r, "dist/admin-paliadin.html")
}
// handlePaliadinTurn kicks off a turn and returns the SSE URL.
//
// We don't block here; the actual Claude work runs in a goroutine that
// pushes events into the per-turn channel. The client immediately opens
// EventSource on the returned URL and reads as the goroutine writes.
func handlePaliadinTurn(w http.ResponseWriter, r *http.Request) {
if !requirePaliadinOwner(w, r) {
return
}
uid, _ := requireUser(w, r) // already validated by requirePaliadinOwner
var req turnRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid JSON"})
return
}
if req.UserMessage == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "user_message required"})
return
}
if req.SessionID == "" {
req.SessionID = uuid.New().String()
}
turnID := uuid.New()
ch := make(chan turnEvent, 16)
pendingMu.Lock()
pendingTurns[turnID] = ch
pendingMu.Unlock()
// Goroutine drives the actual Claude turn. We use a fresh context
// (not r.Context()) because the request is going to return as soon
// as we hand back the SSE URL — we don't want the whole turn to
// cancel when the POST completes.
go runPaliadinTurnAsync(turnID, services.TurnRequest{
UserID: uid,
SessionID: req.SessionID,
UserMessage: req.UserMessage,
PageOrigin: req.PageOrigin,
}, ch)
writeJSON(w, http.StatusOK, turnResponse{
TurnID: turnID.String(),
SSEURL: "/api/paliadin/stream/" + turnID.String(),
})
}
// runPaliadinTurnAsync executes the turn and writes events into ch.
// Uses a 2-minute hard timeout independently of the originating request.
func runPaliadinTurnAsync(turnID uuid.UUID, req services.TurnRequest, ch chan<- turnEvent) {
defer func() {
// Drain + close. The SSE handler reads until the channel closes.
close(ch)
}()
// Send a meta event so the client can show "Paliadin denkt nach …"
send(ch, turnEvent{
Kind: "meta",
Data: map[string]any{
"turn_id": turnID.String(),
"started_at": time.Now().UTC().Format(time.RFC3339),
},
})
ctx, cancel := newDetachedContext(120 * time.Second)
defer cancel()
result, err := paliadinSvc.RunTurn(ctx, req)
if err != nil {
errCode := "upstream_error"
if errors.Is(err, services.ErrTmuxUnavailable) {
errCode = "tmux_unavailable"
}
send(ch, turnEvent{
Kind: "error",
Data: map[string]any{"code": errCode, "message": err.Error()},
})
return
}
// One-shot content event with the full body. The frontend simulates
// streaming with a typewriter effect (cf. design §0.5.5: real
// chunked streaming would require Claude to write the response file
// progressively — out of PoC scope).
send(ch, turnEvent{
Kind: "content",
Data: map[string]any{"text": result.Response},
})
send(ch, turnEvent{
Kind: "end",
Data: map[string]any{
"turn_id": turnID.String(),
"used_tools": result.UsedTools,
"rows_seen": result.RowsSeen,
"chip_count": result.ChipCount,
"classifier_tag": result.ClassifierTag,
"duration_ms": result.DurationMS,
},
})
}
// handlePaliadinStream is the SSE endpoint the EventSource subscribes
// to. Reads from the per-turn channel + writes SSE-framed events.
func handlePaliadinStream(w http.ResponseWriter, r *http.Request) {
if !requirePaliadinOwner(w, r) {
return
}
turnIDStr := r.PathValue("id")
turnID, err := uuid.Parse(turnIDStr)
if err != nil {
http.Error(w, "invalid turn_id", http.StatusBadRequest)
return
}
pendingMu.Lock()
ch, ok := pendingTurns[turnID]
pendingMu.Unlock()
if !ok {
http.Error(w, "unknown turn_id (already finished, or never started)", http.StatusNotFound)
return
}
// SSE headers.
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Cache-Control", "no-cache, no-transform")
w.Header().Set("Connection", "keep-alive")
w.Header().Set("X-Accel-Buffering", "no") // disable nginx/Traefik buffering
flusher, ok := w.(http.Flusher)
if !ok {
http.Error(w, "streaming unsupported", http.StatusInternalServerError)
return
}
// Heartbeat ticker keeps reverse proxies from reaping the connection.
heartbeat := time.NewTicker(25 * time.Second)
defer heartbeat.Stop()
for {
select {
case <-r.Context().Done():
// Client closed the connection — don't drain, leave the
// channel for the goroutine to finish into. Pending-turns
// cleanup happens after end/error events flush below.
return
case <-heartbeat.C:
fmt.Fprint(w, "event: ping\ndata: {}\n\n")
flusher.Flush()
case ev, more := <-ch:
if !more {
// Goroutine finished. Tidy up the pending-turns map.
pendingMu.Lock()
delete(pendingTurns, turnID)
pendingMu.Unlock()
return
}
payload, _ := json.Marshal(ev.Data)
fmt.Fprintf(w, "event: %s\ndata: %s\n\n", ev.Kind, payload)
flusher.Flush()
if ev.Kind == "end" || ev.Kind == "error" {
// Don't return immediately — wait for the channel to
// close so the cleanup branch above runs and the client
// gets a clean EOF.
}
}
}
}
// handlePaliadinReset kills the caller's Paliadin tmux session so the
// next turn boots a fresh claude pane (per-user — see t-paliad-155).
func handlePaliadinReset(w http.ResponseWriter, r *http.Request) {
if !requirePaliadinOwner(w, r) {
return
}
uid, _ := requireUser(w, r) // already validated by requirePaliadinOwner
ctx, cancel := newDetachedContext(10 * time.Second)
defer cancel()
if err := paliadinSvc.ResetSession(ctx, uid); err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]string{
"error": "reset failed: " + err.Error(),
})
return
}
writeJSON(w, http.StatusOK, map[string]bool{"ok": true})
}
// =============================================================================
// /admin/paliadin — monitoring dashboard.
// =============================================================================
// handleAdminPaliadinStats returns the aggregate stats for the dashboard.
func handleAdminPaliadinStats(w http.ResponseWriter, r *http.Request) {
if !requirePaliadinOwner(w, r) {
return
}
uid, _ := requireUser(w, r)
stats, err := paliadinSvc.Stats(r.Context(), uid)
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return
}
writeJSON(w, http.StatusOK, stats)
}
// handleAdminPaliadinTurns returns the most recent turn rows.
func handleAdminPaliadinTurns(w http.ResponseWriter, r *http.Request) {
if !requirePaliadinOwner(w, r) {
return
}
uid, _ := requireUser(w, r)
turns, err := paliadinSvc.ListRecentTurns(r.Context(), uid, 50)
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return
}
writeJSON(w, http.StatusOK, turns)
}
// =============================================================================
// helpers.
// =============================================================================
// send pushes an event onto the channel without blocking — drops on
// overflow. PoC scope: 16-deep buffer, single subscriber, very unlikely
// to overflow even with the slow Claude-via-tmux path.
func send(ch chan<- turnEvent, ev turnEvent) {
select {
case ch <- ev:
default:
// Channel full — drop. Logged by the SSE consumer's gap (it
// keeps reading; only "end"/"error" matter for completion).
}
}