Files
paliad/internal/services/paliadin.go
m 97a412498d feat(t-paliad-155): real Claude SKILL.md + per-user tmux session
Move Paliadin's persona + response protocol from a tmux-keystroke-injected
system prompt into a real Claude skill at ~/.claude/skills/paliadin/SKILL.md
(repo source: scripts/skills/paliadin/SKILL.md, install script:
scripts/install-paliadin-skill). Claude's skill router auto-matches the
[PALIADIN:<uuid>] envelope on every turn, so the protocol contract
survives /clear, fresh sessions, and pane restarts — root-cause fix for
the post-/clear stuck-spinner that triggered this task.

Per-user tmux session keying: each Paliad user gets a session named
<prefix>-<userid8> (first 8 hex chars of UUID). One persistent session
per user, conversation history accumulates per visit, ResetSession kills
the session entirely. Health-check cache becomes per-session.

Service-side simplifications:
- paliadin_prompt.go (paliadinSystemPrompt) deleted; trailer parser stays
  in paliadin.go.
- paliadin_remote.go: ensureBootstrapped removed; healthGate takes a
  session arg + caches per-key; ResetSession derives session from UserID
  and shells out to 'reset <session>'.
- paliadin.go (LocalPaliadinService): per-user pane cache, ensurePane
  takes UserID, no more in-process system-prompt send.
- Paliadin interface: ResetSession now takes UserID.

Shim refactor (scripts/paliadin-shim):
- All verbs accept the tmux session as their first positional arg.
- 'bootstrap' verb removed (skill replaces it).
- 'reset' kills the named session via tmux kill-session.
- Session name validated against [A-Za-z0-9_.-]{1,64}.

Env var rename: PALIADIN_TMUX_SESSION -> PALIADIN_SESSION_PREFIX (semantic
shift from literal session name to per-user prefix); CLAUDE.md updated.

Tests cover per-session health caching, session-name derivation,
ResetSession kill-session shape, and health-cache eviction on reset.
2026-05-08 12:42:57 +02:00

807 lines
29 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package services
// Paliadin — the in-app AI buddy. Two implementations of the same
// interface, picked at boot time (see cmd/server/main.go):
//
// - LocalPaliadinService — talks to a `claude` CLI in a local tmux
// session. The PoC path (t-paliad-146); used on m's laptop.
// - RemotePaliadinService — shells out to ssh on mRiver where the
// long-lived tmux+claude pane lives. The prod path (t-paliad-151);
// used by the paliad.de Dokploy container, which has no `claude`
// CLI of its own.
//
// Designs:
// - docs/design-paliadin-2026-05-07.md (PoC architecture)
// - docs/design-paliadin-tailscale-ssh-2026-05-07.md (remote routing)
//
// Both implementations share the audit-table I/O (paliadinDB) and the
// trailer parser. The conversation state (turn ordering, response file
// polling) is split: Local owns the tmux pane directly; Remote delegates
// to the paliadin-shim on mRiver and reads the file there.
import (
"bytes"
"context"
"database/sql"
"errors"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/google/uuid"
"github.com/jmoiron/sqlx"
"github.com/lib/pq"
)
// PaliadinOwnerEmail is the only account allowed to use the Paliadin
// PoC. Hardcoded — by design — so the gate cannot be flipped via a
// deploy env var. PoC ships at this scope; multi-user opens up only
// when production v1 lands behind its own auth model.
//
// Matches the seed in migration 023 (m's job_title row). If m's email
// ever rotates, this constant must rotate with it; there is no other
// path to enabling Paliadin.
const PaliadinOwnerEmail = "matthias.siebels@hoganlovells.com"
// Paliadin is the interface every Paliadin backend implements. Two
// production implementations: LocalPaliadinService (local tmux+claude)
// and RemotePaliadinService (ssh+paliadin-shim on mRiver). A
// DisabledPaliadinService stub is constructed when neither is available
// so callers don't have to nil-check on every entry point.
type Paliadin interface {
RunTurn(ctx context.Context, req TurnRequest) (*TurnResult, error)
// ResetSession kills the user's tmux session entirely so the next
// RunTurn boots a fresh claude pane. Per-user since each Paliad user
// has their own session (t-paliad-155).
ResetSession(ctx context.Context, userID uuid.UUID) error
ListRecentTurns(ctx context.Context, callerID uuid.UUID, limit int) ([]PaliadinTurn, error)
Stats(ctx context.Context, callerID uuid.UUID) (*PaliadinStats, error)
IsOwner(ctx context.Context, userID uuid.UUID) (bool, error)
}
// paliadinDB is the audit-table read/write surface shared by every
// Paliadin implementation. Embedded in LocalPaliadinService and
// RemotePaliadinService so they inherit IsOwner / ListRecentTurns /
// Stats and the per-turn row writers without duplication.
type paliadinDB struct {
db *sqlx.DB
users *UserService
}
// LocalPaliadinService runs the local tmux+claude PoC (t-paliad-146).
// Used on m's laptop; not deployed to prod (the Dokploy container has no
// `claude` CLI — see RemotePaliadinService for that path).
//
// Per-user tmux session: every Paliad user gets their own session named
// `<sessionPrefix>-<userid8>` (first 8 hex chars of the user's UUID),
// created on demand. The persona + response protocol are loaded from
// the Paliadin skill (~/.claude/skills/paliadin/SKILL.md, installed via
// scripts/install-paliadin-skill); there is no in-process system prompt.
type LocalPaliadinService struct {
paliadinDB
sessionPrefix string
responseDir string
// Cached pane targets per user-session, keyed by tmux session name.
// A session entry maps to "session:window-idx"; cleared when the
// pane dies or ResetSession is called for that user.
mu sync.Mutex
panes map[string]string
// Single in-flight turn at a time across all users. PoC scope —
// claude CLI panes share the host's terminal noise; serialising
// keeps log output unambiguous.
turnMu sync.Mutex
}
// IsOwner returns true when the given user_id corresponds to m's
// account (the only Paliadin PoC user). Resolves via paliad.users.email
// rather than caching a UUID so a DB rebuild that reassigns auth UUIDs
// doesn't strand the gate.
//
// Returns (false, nil) for any other user — including unknown UUIDs and
// users without an email row. Errors only on DB failure.
func (s *paliadinDB) IsOwner(ctx context.Context, userID uuid.UUID) (bool, error) {
var email string
err := s.db.QueryRowxContext(ctx,
`SELECT email FROM paliad.users WHERE id = $1`, userID).Scan(&email)
if errors.Is(err, sql.ErrNoRows) {
return false, nil
}
if err != nil {
return false, fmt.Errorf("paliadin: lookup owner: %w", err)
}
return strings.EqualFold(email, PaliadinOwnerEmail), nil
}
// NewLocalPaliadinService wires the local-tmux PoC backend. The
// sessionPrefix arg is the prefix every per-user tmux session inherits —
// the actual session name is `<prefix>-<userid8>`. Falls back to
// defaults when env vars are empty.
func NewLocalPaliadinService(db *sqlx.DB, users *UserService, sessionPrefix, responseDir string) *LocalPaliadinService {
if sessionPrefix == "" {
sessionPrefix = "paliad-paliadin"
}
if responseDir == "" {
responseDir = "/tmp/paliadin"
}
return &LocalPaliadinService{
paliadinDB: paliadinDB{db: db, users: users},
sessionPrefix: sessionPrefix,
responseDir: responseDir,
panes: make(map[string]string),
}
}
// sessionNameFor returns the tmux session name for a given user. Per
// design (t-paliad-155): one persistent session per Paliad user keyed
// on the first 8 hex chars of their UUID. Conversation history piles
// up across visits; `ResetSession` is the user-driven escape hatch.
func (s *LocalPaliadinService) sessionNameFor(userID uuid.UUID) string {
short := userID.String()
if len(short) >= 8 {
short = short[:8]
}
return s.sessionPrefix + "-" + short
}
// PaliadinTurn is the audit row.
type PaliadinTurn struct {
TurnID uuid.UUID `db:"turn_id" json:"turn_id"`
UserID uuid.UUID `db:"user_id" json:"user_id"`
SessionID string `db:"session_id" json:"session_id"`
StartedAt time.Time `db:"started_at" json:"started_at"`
FinishedAt *time.Time `db:"finished_at" json:"finished_at,omitempty"`
DurationMS *int `db:"duration_ms" json:"duration_ms,omitempty"`
UserMessage string `db:"user_message" json:"user_message"`
Response *string `db:"response" json:"response,omitempty"`
ResponseTokens *int `db:"response_tokens" json:"response_tokens,omitempty"`
UsedTools pq.StringArray `db:"used_tools" json:"used_tools"`
RowsSeen pq.Int64Array `db:"rows_seen" json:"rows_seen"`
ChipCount int `db:"chip_count" json:"chip_count"`
Abandoned bool `db:"abandoned" json:"abandoned"`
PageOrigin *string `db:"page_origin" json:"page_origin,omitempty"`
ErrorCode *string `db:"error_code" json:"error_code,omitempty"`
ClassifierTag *string `db:"classifier_tag" json:"classifier_tag,omitempty"`
}
// TurnRequest is what the handler passes to RunTurn.
type TurnRequest struct {
UserID uuid.UUID
SessionID string
UserMessage string
PageOrigin string // empty when unknown
}
// TurnResult is what RunTurn returns to the handler.
type TurnResult struct {
TurnID uuid.UUID
Response string // body without [paliadin-meta] trailer
UsedTools []string
RowsSeen []int
ChipCount int
ClassifierTag string
DurationMS int
}
// ErrPaliadinDisabled is the canonical "service is wired but turned off"
// signal. Handlers map it to 503.
var ErrPaliadinDisabled = errors.New("paliadin: disabled")
// ErrTmuxUnavailable indicates we couldn't talk to tmux (binary missing,
// session unreachable, etc.). Handlers map it to 503 with a hint.
var ErrTmuxUnavailable = errors.New("paliadin: tmux unavailable")
// RunTurn executes one full Q&A round. Blocks until Claude has written
// the response file or we time out (default 60 s). Writes the audit row
// in both success + error paths.
//
// PoC: serialised. The package-level turnMu enforces "one at a time".
// m is the only user, so this is fine.
func (s *LocalPaliadinService) RunTurn(ctx context.Context, req TurnRequest) (*TurnResult, error) {
s.turnMu.Lock()
defer s.turnMu.Unlock()
turnID := uuid.New()
startedAt := time.Now().UTC()
// Audit row — written *first* so a crash mid-turn still leaves traces.
if err := s.insertTurnRow(ctx, &PaliadinTurn{
TurnID: turnID,
UserID: req.UserID,
SessionID: req.SessionID,
StartedAt: startedAt,
UserMessage: req.UserMessage,
PageOrigin: optionalString(req.PageOrigin),
}); err != nil {
return nil, fmt.Errorf("paliadin: insert turn row: %w", err)
}
// Ensure tmux session + Claude pane (per-user — keyed off UserID).
target, err := s.ensurePane(ctx, req.UserID)
if err != nil {
_ = s.markTurnError(ctx, turnID, "tmux_unresponsive")
return nil, fmt.Errorf("%w: %v", ErrTmuxUnavailable, err)
}
// Make sure the response dir exists.
if err := os.MkdirAll(s.responseDir, 0o755); err != nil {
_ = s.markTurnError(ctx, turnID, "tmux_unresponsive")
return nil, fmt.Errorf("paliadin: mkdir response dir: %w", err)
}
// Send the framed prompt. The Paliadin skill at
// ~/.claude/skills/paliadin/SKILL.md description-matches on this
// envelope and writes the response to the per-turn file.
envelope := fmt.Sprintf("[PALIADIN:%s] %s", turnID, sanitiseForTmux(req.UserMessage))
if err := s.sendToPane(ctx, target, envelope); err != nil {
_ = s.markTurnError(ctx, turnID, "tmux_unresponsive")
return nil, fmt.Errorf("%w: send prompt: %v", ErrTmuxUnavailable, err)
}
// Poll for the response file. Fixed 60 s timeout; abort early if the
// caller's context is cancelled (e.g. user clicked Stop).
respPath := filepath.Join(s.responseDir, turnID.String()+".txt")
body, err := s.pollForResponse(ctx, respPath, 60*time.Second)
if err != nil {
ec := "timeout"
if errors.Is(err, context.Canceled) {
ec = "user_aborted"
}
_ = s.markTurnAbandonedOrError(ctx, turnID, ec, ec == "user_aborted")
return nil, err
}
// Strip + parse the [paliadin-meta] trailer. Best-effort: the prompt
// instructs Claude to emit it but the PoC's monitoring is precisely
// what tells us how reliable that is in practice.
cleanBody, meta := splitTrailer(body)
tokens := approxTokenCount(cleanBody)
chipCount := countChips(cleanBody)
finished := time.Now().UTC()
durationMS := int(finished.Sub(startedAt) / time.Millisecond)
// Write the result back into the audit row.
if err := s.completeTurn(ctx, turnID, finished, durationMS, cleanBody, tokens, meta, chipCount); err != nil {
log.Printf("paliadin: complete turn %s: %v", turnID, err)
// Don't fail the user-facing request on audit-row write errors —
// the response is real even if the bookkeeping is broken.
}
return &TurnResult{
TurnID: turnID,
Response: cleanBody,
UsedTools: meta.UsedTools,
RowsSeen: meta.RowsSeen,
ChipCount: chipCount,
ClassifierTag: meta.ClassifierTag,
DurationMS: durationMS,
}, nil
}
// ResetSession kills the user's tmux session entirely so the next
// RunTurn boots a fresh claude pane. With skill-based persona load
// (~/.claude/skills/paliadin/SKILL.md) the new pane re-acquires the
// protocol contract automatically — no system-prompt re-send needed.
func (s *LocalPaliadinService) ResetSession(ctx context.Context, userID uuid.UUID) error {
session := s.sessionNameFor(userID)
s.mu.Lock()
delete(s.panes, session)
s.mu.Unlock()
// `tmux kill-session` returns non-zero if the session doesn't exist;
// that's fine — the next RunTurn will recreate it. Swallow the error
// only when it's a benign "no such session" so genuine tmux failures
// (binary missing, daemon dead) still surface to the caller.
if err := runTmux(ctx, "has-session", "-t", session); err != nil {
return nil
}
return runTmux(ctx, "kill-session", "-t", session)
}
// ListRecentTurns reads the last N turns visible to the caller.
// global_admin sees everything; everyone else sees their own.
func (s *paliadinDB) ListRecentTurns(ctx context.Context, callerID uuid.UUID, limit int) ([]PaliadinTurn, error) {
if limit <= 0 || limit > 200 {
limit = 50
}
out := make([]PaliadinTurn, 0, limit)
q := `
SELECT turn_id, user_id, session_id, started_at, finished_at, duration_ms,
user_message, response, response_tokens, used_tools, rows_seen,
chip_count, abandoned, page_origin, error_code, classifier_tag
FROM paliad.paliadin_turns
WHERE user_id = $1
OR EXISTS (SELECT 1 FROM paliad.users u
WHERE u.id = $1 AND u.global_role = 'global_admin')
ORDER BY started_at DESC
LIMIT $2
`
if err := s.db.SelectContext(ctx, &out, q, callerID, limit); err != nil {
return nil, fmt.Errorf("paliadin: list turns: %w", err)
}
return out, nil
}
// PaliadinStats is the aggregate view shown on /admin/paliadin.
type PaliadinStats struct {
TotalTurns int `json:"total_turns"`
TurnsLast7Days int `json:"turns_last_7_days"`
MedianDurationMS int `json:"median_duration_ms"`
P90DurationMS int `json:"p90_duration_ms"`
ToolUseRate float64 `json:"tool_use_rate"` // 0..1
AbandonRate float64 `json:"abandon_rate"` // 0..1
ByClassifier map[string]int `json:"by_classifier"` // tag → count
DailyCounts []PaliadinDailyCount `json:"daily_counts"` // last 30 days
TopPrompts []PaliadinPromptCount `json:"top_prompts"` // most-frequent normalised prompts
}
type PaliadinDailyCount struct {
Day string `db:"day" json:"day"` // YYYY-MM-DD
Count int `db:"count" json:"count"`
}
type PaliadinPromptCount struct {
Prompt string `db:"prompt" json:"prompt"`
Count int `db:"count" json:"count"`
}
// Stats computes the dashboard aggregate. global_admin sees everything;
// everyone else sees their own slice (PoC has only m, but the policy
// matches RLS on the table).
func (s *paliadinDB) Stats(ctx context.Context, callerID uuid.UUID) (*PaliadinStats, error) {
stats := &PaliadinStats{
ByClassifier: map[string]int{},
DailyCounts: []PaliadinDailyCount{},
TopPrompts: []PaliadinPromptCount{},
}
// Visibility predicate: caller's own rows OR all rows if global_admin.
visible := `(user_id = $1 OR EXISTS (SELECT 1 FROM paliad.users u WHERE u.id = $1 AND u.global_role = 'global_admin'))`
// Total + 7-day count.
if err := s.db.QueryRowxContext(ctx, fmt.Sprintf(`
SELECT COUNT(*),
COUNT(*) FILTER (WHERE started_at >= now() - interval '7 days')
FROM paliad.paliadin_turns
WHERE %s
`, visible), callerID).Scan(&stats.TotalTurns, &stats.TurnsLast7Days); err != nil {
return nil, fmt.Errorf("paliadin: stats totals: %w", err)
}
if stats.TotalTurns == 0 {
return stats, nil
}
// Duration percentiles. Skip rows still in flight (duration_ms NULL).
if err := s.db.QueryRowxContext(ctx, fmt.Sprintf(`
SELECT COALESCE(percentile_cont(0.5) WITHIN GROUP (ORDER BY duration_ms), 0)::int,
COALESCE(percentile_cont(0.9) WITHIN GROUP (ORDER BY duration_ms), 0)::int
FROM paliad.paliadin_turns
WHERE %s AND duration_ms IS NOT NULL
`, visible), callerID).Scan(&stats.MedianDurationMS, &stats.P90DurationMS); err != nil {
return nil, fmt.Errorf("paliadin: stats percentiles: %w", err)
}
// Tool-use + abandon rates.
var toolUsedTurns, abandonedTurns int
if err := s.db.QueryRowxContext(ctx, fmt.Sprintf(`
SELECT COUNT(*) FILTER (WHERE array_length(used_tools, 1) > 0),
COUNT(*) FILTER (WHERE abandoned = true)
FROM paliad.paliadin_turns
WHERE %s
`, visible), callerID).Scan(&toolUsedTurns, &abandonedTurns); err != nil {
return nil, fmt.Errorf("paliadin: stats rates: %w", err)
}
stats.ToolUseRate = float64(toolUsedTurns) / float64(stats.TotalTurns)
stats.AbandonRate = float64(abandonedTurns) / float64(stats.TotalTurns)
// Histogram by classifier_tag.
rows, err := s.db.QueryxContext(ctx, fmt.Sprintf(`
SELECT COALESCE(classifier_tag, 'untagged') AS tag, COUNT(*) AS n
FROM paliad.paliadin_turns
WHERE %s
GROUP BY tag
`, visible), callerID)
if err != nil {
return nil, fmt.Errorf("paliadin: stats classifier: %w", err)
}
defer rows.Close()
for rows.Next() {
var tag string
var n int
if err := rows.Scan(&tag, &n); err != nil {
return nil, err
}
stats.ByClassifier[tag] = n
}
// Daily counts (last 30 days).
if err := s.db.SelectContext(ctx, &stats.DailyCounts, fmt.Sprintf(`
SELECT to_char(date_trunc('day', started_at), 'YYYY-MM-DD') AS day,
COUNT(*) AS count
FROM paliad.paliadin_turns
WHERE %s
AND started_at >= now() - interval '30 days'
GROUP BY day
ORDER BY day ASC
`, visible), callerID); err != nil {
return nil, fmt.Errorf("paliadin: stats daily: %w", err)
}
// Top prompts (normalised: lowercase + collapse whitespace + trim).
if err := s.db.SelectContext(ctx, &stats.TopPrompts, fmt.Sprintf(`
SELECT trim(regexp_replace(lower(user_message), '\s+', ' ', 'g')) AS prompt,
COUNT(*) AS count
FROM paliad.paliadin_turns
WHERE %s
GROUP BY prompt
ORDER BY count DESC, prompt ASC
LIMIT 10
`, visible), callerID); err != nil {
return nil, fmt.Errorf("paliadin: stats top prompts: %w", err)
}
return stats, nil
}
// =============================================================================
// tmux orchestration — adapted from mVoice/server.py:250-380.
// =============================================================================
// ensurePane returns the tmux target ("session:window-idx") of the live
// Claude pane for this user, creating both session and window if
// missing. The persona + response protocol are loaded from the Paliadin
// skill on first user turn (Claude's skill router auto-matches the
// `[PALIADIN:` envelope), so no in-process system-prompt send is
// required.
func (s *LocalPaliadinService) ensurePane(ctx context.Context, userID uuid.UUID) (string, error) {
session := s.sessionNameFor(userID)
s.mu.Lock()
defer s.mu.Unlock()
// Cheap path: cached target still alive? Reuse.
if cached, ok := s.panes[session]; ok && cached != "" && s.paneAlive(ctx, cached) {
return cached, nil
}
// Ensure session.
if err := runTmux(ctx, "has-session", "-t", session); err != nil {
// Create detached.
if err := runTmux(ctx, "new-session", "-d", "-s", session); err != nil {
return "", fmt.Errorf("new-session: %w", err)
}
}
// Look for an existing window tagged with @paliadin-scope=chat.
if existing := s.findChatWindow(ctx, session); existing != "" {
s.panes[session] = existing
return existing, nil
}
// No window — create one running `claude` in a fresh pane. Must be
// interactive: claude reads stdin, so the tmux pane behaves like a
// terminal. We use `new-window -P -F` to print the new index back.
out, err := runTmuxOut(ctx, "new-window", "-t", session,
"-n", "claude-paliadin",
"-P", "-F", "#{window_index}",
"claude")
if err != nil {
return "", fmt.Errorf("new-window claude: %w", err)
}
idx := strings.TrimSpace(out)
target := fmt.Sprintf("%s:%s", session, idx)
// Wait for Claude's prompt indicator. Claude Code's interactive
// prompt rendering varies but always settles into a state where the
// pane has a "" prompt glyph or "│" sidebar visible. We give it
// 30 s, which is generous.
if err := s.waitForPaneReady(ctx, target, 30*time.Second); err != nil {
return "", fmt.Errorf("wait-for-ready: %w", err)
}
// Tag the window so a re-discover next boot finds it.
_ = runTmux(ctx, "set-window-option", "-t", target, "@paliadin-scope", "chat")
_ = runTmux(ctx, "set-window-option", "-t", target, "@fix-name", "claude-paliadin")
s.panes[session] = target
return target, nil
}
func (s *LocalPaliadinService) findChatWindow(ctx context.Context, session string) string {
out, err := runTmuxOut(ctx, "list-windows", "-t", session,
"-F", "#{window_index}")
if err != nil {
return ""
}
for _, idx := range strings.Fields(out) {
target := fmt.Sprintf("%s:%s", session, idx)
scope, err := runTmuxOut(ctx, "show-window-option",
"-t", target, "-v", "@paliadin-scope")
if err == nil && strings.TrimSpace(scope) == "chat" {
return target
}
}
return ""
}
func (s *LocalPaliadinService) paneAlive(ctx context.Context, target string) bool {
if err := runTmux(ctx, "has-session", "-t", target); err != nil {
return false
}
return true
}
func (s *LocalPaliadinService) waitForPaneReady(ctx context.Context, target string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
out, err := runTmuxOut(ctx, "capture-pane", "-t", target, "-p")
if err == nil && (strings.Contains(out, "") || strings.Contains(out, "│")) {
return nil
}
time.Sleep(500 * time.Millisecond)
}
return fmt.Errorf("pane %s not ready within %s", target, timeout)
}
func (s *LocalPaliadinService) sendToPane(ctx context.Context, target, msg string) error {
// `-l` sends the message literally (no key parsing) — necessary so
// our prompt's special characters don't get interpreted.
if err := runTmux(ctx, "send-keys", "-t", target, "-l", msg); err != nil {
return err
}
// Trailing Enter. tmux send-keys treats "Enter" as a special key name.
if err := runTmux(ctx, "send-keys", "-t", target, "Enter"); err != nil {
return err
}
return nil
}
// pollForResponse waits for the response file to materialise. Returns
// the file's content (and removes the file). Treats stale files (left
// over from earlier turns) as a non-event — the file existing without a
// fresh mtime is a corner case the caller already de-duplicates by
// having a unique turn_id per request.
func (s *LocalPaliadinService) pollForResponse(ctx context.Context, path string, timeout time.Duration) (string, error) {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
select {
case <-ctx.Done():
return "", ctx.Err()
default:
}
data, err := os.ReadFile(path)
if err == nil && len(data) > 0 {
// Brief settle delay so we don't read mid-flush.
time.Sleep(50 * time.Millisecond)
data, _ = os.ReadFile(path)
_ = os.Remove(path)
return string(data), nil
}
time.Sleep(200 * time.Millisecond)
}
return "", fmt.Errorf("paliadin: response timeout after %s", timeout)
}
// =============================================================================
// shell / tmux helpers.
// =============================================================================
// runTmux runs `tmux <args...>`. Discards output. Returns error if tmux
// returns non-zero.
func runTmux(ctx context.Context, args ...string) error {
c, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
cmd := exec.CommandContext(c, "tmux", args...)
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("tmux %s: %w (stderr: %s)", strings.Join(args, " "), err, stderr.String())
}
return nil
}
// runTmuxOut runs `tmux <args...>` and returns stdout. Useful for
// capture-pane / list-windows / show-window-option.
func runTmuxOut(ctx context.Context, args ...string) (string, error) {
c, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
cmd := exec.CommandContext(c, "tmux", args...)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return "", fmt.Errorf("tmux %s: %w (stderr: %s)", strings.Join(args, " "), err, stderr.String())
}
return stdout.String(), nil
}
// sanitiseForTmux removes control sequences that would confuse the pane.
// `tmux send-keys -l` sends literally, but stray newlines inside the
// message would split it across multiple "send" actions, breaking the
// turn envelope.
func sanitiseForTmux(s string) string {
s = strings.ReplaceAll(s, "\r", " ")
s = strings.ReplaceAll(s, "\n", " ")
// Cap length: a runaway prompt is a footgun.
const maxLen = 8000
if len(s) > maxLen {
s = s[:maxLen] + " […truncated]"
}
return s
}
// =============================================================================
// trailer parsing.
// =============================================================================
// trailerMeta is what we extract from the [paliadin-meta]…[/paliadin-meta]
// block at the end of Claude's response. Best-effort: missing fields
// default to zero values.
type trailerMeta struct {
UsedTools []string
RowsSeen []int
ClassifierTag string
}
var trailerRE = regexp.MustCompile(`(?s)\n*---\s*\n+\[paliadin-meta\]\s*\n(.+?)\n\[/paliadin-meta\]\s*$`)
// splitTrailer separates the meta block from the body. If no trailer is
// present, the entire input is returned as the body.
func splitTrailer(body string) (string, trailerMeta) {
body = strings.TrimRight(body, " \t\n\r")
m := trailerRE.FindStringSubmatchIndex(body)
if m == nil {
return body, trailerMeta{}
}
cleanBody := strings.TrimRight(body[:m[0]], " \t\n\r")
metaText := body[m[2]:m[3]]
return cleanBody, parseTrailer(metaText)
}
func parseTrailer(text string) trailerMeta {
out := trailerMeta{}
for _, line := range strings.Split(text, "\n") {
k, v, ok := splitFirst(strings.TrimSpace(line), ":")
if !ok {
continue
}
v = strings.TrimSpace(v)
switch strings.ToLower(strings.TrimSpace(k)) {
case "used_tools":
for _, t := range strings.Split(v, ",") {
t = strings.TrimSpace(t)
if t != "" {
out.UsedTools = append(out.UsedTools, t)
}
}
case "rows_seen":
for _, t := range strings.Split(v, ",") {
n, err := strconv.Atoi(strings.TrimSpace(t))
if err == nil {
out.RowsSeen = append(out.RowsSeen, n)
}
}
case "classifier_tag":
out.ClassifierTag = v
}
}
return out
}
func splitFirst(s, sep string) (string, string, bool) {
i := strings.Index(s, sep)
if i < 0 {
return "", "", false
}
return s[:i], s[i+len(sep):], true
}
// approxTokenCount is a coarse word-count × 1.3 heuristic. Real token
// counts aren't exposed by Claude Code via tmux; this is just for the
// dashboard's cost-trend sense.
func approxTokenCount(s string) int {
if s == "" {
return 0
}
words := strings.Fields(s)
return int(float64(len(words)) * 1.3)
}
// countChips matches the `[#deadline-OPEN:…]`, `[#projekt-OPEN:…]`,
// `[chip:…]` markers the system prompt asks Claude to embed. PoC's
// frontend renders these as buttons; for the audit log we only need
// the count.
var chipRE = regexp.MustCompile(`\[(?:#[a-z]+-OPEN:[A-Za-z0-9\-_]+|chip:[a-z]+:[^\]]+)\]`)
func countChips(s string) int {
return len(chipRE.FindAllString(s, -1))
}
// =============================================================================
// audit-row writers.
// =============================================================================
func (s *paliadinDB) insertTurnRow(ctx context.Context, t *PaliadinTurn) error {
q := `
INSERT INTO paliad.paliadin_turns (
turn_id, user_id, session_id, started_at, user_message, page_origin
) VALUES ($1, $2, $3, $4, $5, $6)
`
_, err := s.db.ExecContext(ctx, q,
t.TurnID, t.UserID, t.SessionID, t.StartedAt, t.UserMessage, t.PageOrigin)
return err
}
func (s *paliadinDB) completeTurn(ctx context.Context, turnID uuid.UUID,
finishedAt time.Time, durationMS int, response string, tokens int,
meta trailerMeta, chipCount int) error {
rowsSeen := make(pq.Int64Array, 0, len(meta.RowsSeen))
for _, n := range meta.RowsSeen {
rowsSeen = append(rowsSeen, int64(n))
}
q := `
UPDATE paliad.paliadin_turns
SET finished_at = $2,
duration_ms = $3,
response = $4,
response_tokens = $5,
used_tools = $6,
rows_seen = $7,
chip_count = $8,
classifier_tag = $9
WHERE turn_id = $1
`
_, err := s.db.ExecContext(ctx, q,
turnID, finishedAt, durationMS, response, tokens,
pq.StringArray(meta.UsedTools), rowsSeen, chipCount,
optionalString(meta.ClassifierTag))
return err
}
func (s *paliadinDB) markTurnError(ctx context.Context, turnID uuid.UUID, code string) error {
finished := time.Now().UTC()
q := `
UPDATE paliad.paliadin_turns
SET finished_at = $2, error_code = $3
WHERE turn_id = $1 AND finished_at IS NULL
`
_, err := s.db.ExecContext(ctx, q, turnID, finished, code)
return err
}
func (s *paliadinDB) markTurnAbandonedOrError(ctx context.Context, turnID uuid.UUID, code string, abandoned bool) error {
finished := time.Now().UTC()
q := `
UPDATE paliad.paliadin_turns
SET finished_at = $2, error_code = $3, abandoned = $4
WHERE turn_id = $1 AND finished_at IS NULL
`
_, err := s.db.ExecContext(ctx, q, turnID, finished, code, abandoned)
return err
}
func optionalString(s string) *string {
if s == "" {
return nil
}
return &s
}
// Compile-time type guards (catches sql.ErrNoRows shifts).
var _ error = sql.ErrNoRows