When a user's tmux session dies (mRiver reboot, OOM, manual kill,
container restart) the next turn used to wake claude with NO prior
context — the persona had to derive everything from the new turn
alone. Now: when the Go side detects a fresh pane, it pulls the last
N exchanges from paliad.paliadin_turns and prepends them as a
[primer …][/primer] block to the next user envelope.
Format SKILL.md parses (single-line, control-chars stripped):
[PALIADIN:<turn_id>] [primer last=N] U: … \n A: … \n … [/primer] [ctx …] <Frage>
Detection paths:
- Local (LocalPaliadinService): ensurePane now returns
(target, isFresh, err). isFresh is true when no prior
@paliadin-scope=chat window existed and we created one. RunTurn
passes that into buildPrimerIfFresh.
- Remote (RemotePaliadinService): can't see across the SSH boundary
to know the pane's true freshness, so we approximate with a
per-(session, Go-process) "primed" cache. First turn after
process-start, ResetSession, or healthGate failure rebuilds the
primer; subsequent turns skip it. ResetSession + healthGate failure
both call clearPrimed(session) explicitly.
paliadinDB.buildPrimerIfFresh assembles the block:
- Reads the last MaxPrimerTurns=5 exchanges from
ListHistoryForSession (Slice F).
- truncateForPrimer normalises each side (drops \r\n, collapses
whitespace, caps at MaxPrimerCharsPerSide=600 with …).
- Returns "" silently when isFresh=false, no SessionID, no prior
history, or DB error — the user's actual question still lands; we
only lose the recap.
SKILL.md (~/.claude/skills/paliadin/SKILL.md, refreshed via
scripts/install-paliadin-skill) gets a new "Crash-recovery primer"
section above the context-envelope block. Five behaviour rules:
1. Don't re-execute prior tool calls (audit log already has them).
2. Use the primer for thread continuity, not as a data source.
Re-call tools for fresh facts.
3. Truncated lines (ending in …) are partial — paraphrase rather
than quote.
4. No primer at all = normal case (existing pane, history is in
tmux memory). Behave as before.
5. Acknowledge sparingly — usually just answer the actual question
with the recap as silent context.
New test TestTruncateForPrimer pins the per-side truncation contract
(no \r\n leaks, repeated spaces collapsed, ellipsis on oversized
input, short input untouched). go test green.
Refs: docs/design-paliadin-inline-2026-05-08.md §6
(deferred Anthropic API cutover prereq).
1281 lines
47 KiB
Go
1281 lines
47 KiB
Go
package services
|
||
|
||
// Paliadin — the in-app AI buddy. Two implementations of the same
|
||
// interface, picked at boot time (see cmd/server/main.go):
|
||
//
|
||
// - LocalPaliadinService — talks to a `claude` CLI in a local tmux
|
||
// session. The PoC path (t-paliad-146); used on m's laptop.
|
||
// - RemotePaliadinService — shells out to ssh on mRiver where the
|
||
// long-lived tmux+claude pane lives. The prod path (t-paliad-151);
|
||
// used by the paliad.de Dokploy container, which has no `claude`
|
||
// CLI of its own.
|
||
//
|
||
// Designs:
|
||
// - docs/design-paliadin-2026-05-07.md (PoC architecture)
|
||
// - docs/design-paliadin-tailscale-ssh-2026-05-07.md (remote routing)
|
||
//
|
||
// Both implementations share the audit-table I/O (paliadinDB) and the
|
||
// trailer parser. The conversation state (turn ordering, response file
|
||
// polling) is split: Local owns the tmux pane directly; Remote delegates
|
||
// to the paliadin-shim on mRiver and reads the file there.
|
||
|
||
import (
|
||
"bytes"
|
||
"context"
|
||
"database/sql"
|
||
"encoding/json"
|
||
"errors"
|
||
"fmt"
|
||
"log"
|
||
"os"
|
||
"os/exec"
|
||
"path/filepath"
|
||
"regexp"
|
||
"strconv"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/google/uuid"
|
||
"github.com/jmoiron/sqlx"
|
||
"github.com/lib/pq"
|
||
)
|
||
|
||
// PaliadinOwnerEmail is the only account allowed to use the Paliadin
|
||
// PoC. Hardcoded — by design — so the gate cannot be flipped via a
|
||
// deploy env var. PoC ships at this scope; multi-user opens up only
|
||
// when production v1 lands behind its own auth model.
|
||
//
|
||
// Matches the seed in migration 023 (m's job_title row). If m's email
|
||
// ever rotates, this constant must rotate with it; there is no other
|
||
// path to enabling Paliadin.
|
||
const PaliadinOwnerEmail = "matthias.siebels@hoganlovells.com"
|
||
|
||
// Paliadin is the interface every Paliadin backend implements. Two
|
||
// production implementations: LocalPaliadinService (local tmux+claude)
|
||
// and RemotePaliadinService (ssh+paliadin-shim on mRiver). A
|
||
// DisabledPaliadinService stub is constructed when neither is available
|
||
// so callers don't have to nil-check on every entry point.
|
||
type Paliadin interface {
|
||
RunTurn(ctx context.Context, req TurnRequest) (*TurnResult, error)
|
||
// ResetSession kills the user's tmux session entirely so the next
|
||
// RunTurn boots a fresh claude pane. Per-user since each Paliad user
|
||
// has their own session (t-paliad-155).
|
||
ResetSession(ctx context.Context, userID uuid.UUID) error
|
||
ListRecentTurns(ctx context.Context, callerID uuid.UUID, limit int) ([]PaliadinTurn, error)
|
||
// GetTurn returns a single turn the caller is allowed to see.
|
||
// global_admin can see anyone's turn; everyone else only their own.
|
||
// Returns sql.ErrNoRows when the row is invisible or absent.
|
||
GetTurn(ctx context.Context, callerID uuid.UUID, turnID uuid.UUID) (*PaliadinTurn, error)
|
||
// ListHistoryForSession returns the caller's turns for a given browser
|
||
// session in chronological order (oldest → newest). Powers the
|
||
// crash-resistant chat history hydrate (t-paliad-161 follow-up): the
|
||
// inline drawer and the standalone /paliadin page share one session
|
||
// id, so a turn typed in the drawer surfaces on the standalone page
|
||
// (and vice versa) on next mount. DB is source of truth; localStorage
|
||
// is render-cache only.
|
||
ListHistoryForSession(ctx context.Context, callerID uuid.UUID, sessionID string, limit int) ([]PaliadinTurn, error)
|
||
Stats(ctx context.Context, callerID uuid.UUID) (*PaliadinStats, error)
|
||
IsOwner(ctx context.Context, userID uuid.UUID) (bool, error)
|
||
}
|
||
|
||
// paliadinDB is the audit-table read/write surface shared by every
|
||
// Paliadin implementation. Embedded in LocalPaliadinService and
|
||
// RemotePaliadinService so they inherit IsOwner / ListRecentTurns /
|
||
// Stats and the per-turn row writers without duplication.
|
||
type paliadinDB struct {
|
||
db *sqlx.DB
|
||
users *UserService
|
||
}
|
||
|
||
// LocalPaliadinService runs the local tmux+claude PoC (t-paliad-146).
|
||
// Used on m's laptop; not deployed to prod (the Dokploy container has no
|
||
// `claude` CLI — see RemotePaliadinService for that path).
|
||
//
|
||
// Per-user tmux session: every Paliad user gets their own session named
|
||
// `<sessionPrefix>-<userid8>` (first 8 hex chars of the user's UUID),
|
||
// created on demand. The persona + response protocol are loaded from
|
||
// the Paliadin skill (~/.claude/skills/paliadin/SKILL.md, installed via
|
||
// scripts/install-paliadin-skill); there is no in-process system prompt.
|
||
type LocalPaliadinService struct {
|
||
paliadinDB
|
||
sessionPrefix string
|
||
responseDir string
|
||
|
||
// Cached pane targets per user-session, keyed by tmux session name.
|
||
// A session entry maps to "session:window-idx"; cleared when the
|
||
// pane dies or ResetSession is called for that user.
|
||
mu sync.Mutex
|
||
panes map[string]string
|
||
|
||
// Single in-flight turn at a time across all users. PoC scope —
|
||
// claude CLI panes share the host's terminal noise; serialising
|
||
// keeps log output unambiguous.
|
||
turnMu sync.Mutex
|
||
|
||
// Janitor goroutine: watches responseDir for files written after the
|
||
// 60 s pollForResponse window expired and patches the corresponding
|
||
// paliadin_turns row so the chat UI can render the late response.
|
||
// Started from cmd/server once at boot; idempotent.
|
||
janitorOnce sync.Once
|
||
}
|
||
|
||
// IsOwner returns true when the given user_id corresponds to m's
|
||
// account (the only Paliadin PoC user). Resolves via paliad.users.email
|
||
// rather than caching a UUID so a DB rebuild that reassigns auth UUIDs
|
||
// doesn't strand the gate.
|
||
//
|
||
// Returns (false, nil) for any other user — including unknown UUIDs and
|
||
// users without an email row. Errors only on DB failure.
|
||
func (s *paliadinDB) IsOwner(ctx context.Context, userID uuid.UUID) (bool, error) {
|
||
var email string
|
||
err := s.db.QueryRowxContext(ctx,
|
||
`SELECT email FROM paliad.users WHERE id = $1`, userID).Scan(&email)
|
||
if errors.Is(err, sql.ErrNoRows) {
|
||
return false, nil
|
||
}
|
||
if err != nil {
|
||
return false, fmt.Errorf("paliadin: lookup owner: %w", err)
|
||
}
|
||
return strings.EqualFold(email, PaliadinOwnerEmail), nil
|
||
}
|
||
|
||
// NewLocalPaliadinService wires the local-tmux PoC backend. The
|
||
// sessionPrefix arg is the prefix every per-user tmux session inherits —
|
||
// the actual session name is `<prefix>-<userid8>`. Falls back to
|
||
// defaults when env vars are empty.
|
||
func NewLocalPaliadinService(db *sqlx.DB, users *UserService, sessionPrefix, responseDir string) *LocalPaliadinService {
|
||
if sessionPrefix == "" {
|
||
sessionPrefix = "paliad-paliadin"
|
||
}
|
||
if responseDir == "" {
|
||
responseDir = "/tmp/paliadin"
|
||
}
|
||
return &LocalPaliadinService{
|
||
paliadinDB: paliadinDB{db: db, users: users},
|
||
sessionPrefix: sessionPrefix,
|
||
responseDir: responseDir,
|
||
panes: make(map[string]string),
|
||
}
|
||
}
|
||
|
||
// sessionNameFor returns the tmux session name for a given user. Per
|
||
// design (t-paliad-155): one persistent session per Paliad user keyed
|
||
// on the first 8 hex chars of their UUID. Conversation history piles
|
||
// up across visits; `ResetSession` is the user-driven escape hatch.
|
||
func (s *LocalPaliadinService) sessionNameFor(userID uuid.UUID) string {
|
||
short := userID.String()
|
||
if len(short) >= 8 {
|
||
short = short[:8]
|
||
}
|
||
return s.sessionPrefix + "-" + short
|
||
}
|
||
|
||
// PaliadinTurn is the audit row.
|
||
type PaliadinTurn struct {
|
||
TurnID uuid.UUID `db:"turn_id" json:"turn_id"`
|
||
UserID uuid.UUID `db:"user_id" json:"user_id"`
|
||
SessionID string `db:"session_id" json:"session_id"`
|
||
StartedAt time.Time `db:"started_at" json:"started_at"`
|
||
FinishedAt *time.Time `db:"finished_at" json:"finished_at,omitempty"`
|
||
DurationMS *int `db:"duration_ms" json:"duration_ms,omitempty"`
|
||
UserMessage string `db:"user_message" json:"user_message"`
|
||
Response *string `db:"response" json:"response,omitempty"`
|
||
ResponseTokens *int `db:"response_tokens" json:"response_tokens,omitempty"`
|
||
UsedTools pq.StringArray `db:"used_tools" json:"used_tools"`
|
||
RowsSeen pq.Int64Array `db:"rows_seen" json:"rows_seen"`
|
||
ChipCount int `db:"chip_count" json:"chip_count"`
|
||
Abandoned bool `db:"abandoned" json:"abandoned"`
|
||
PageOrigin *string `db:"page_origin" json:"page_origin,omitempty"`
|
||
ErrorCode *string `db:"error_code" json:"error_code,omitempty"`
|
||
ClassifierTag *string `db:"classifier_tag" json:"classifier_tag,omitempty"`
|
||
// Joined user fields, populated by the admin-monitor query only
|
||
// (ListRecentTurns). Empty in the user-facing /api/paliadin/* paths.
|
||
UserEmail *string `db:"user_email" json:"user_email,omitempty"`
|
||
UserDisplayName *string `db:"user_display_name" json:"user_display_name,omitempty"`
|
||
}
|
||
|
||
// TurnRequest is what the handler passes to RunTurn.
|
||
//
|
||
// Context (t-paliad-161) is the structured page-context payload the inline
|
||
// widget submits. The standalone /paliadin page leaves it nil; the widget
|
||
// fills it from frontend/src/client/paliadin-context.ts. Stored verbatim
|
||
// in paliadin_turns.context jsonb (see migration 070); a flattened
|
||
// `[ctx …]` block is also prepended to the user envelope so SKILL.md can
|
||
// branch on it without parsing JSON inside tmux.
|
||
type TurnRequest struct {
|
||
UserID uuid.UUID
|
||
SessionID string
|
||
UserMessage string
|
||
PageOrigin string // empty when unknown
|
||
Context *TurnContext
|
||
}
|
||
|
||
// TurnContext is the structured page-context payload from the inline
|
||
// widget. See docs/design-paliadin-inline-2026-05-08.md §4.1.
|
||
//
|
||
// Every field except RouteName + PageOrigin is optional — the empty
|
||
// payload (only RouteName + PageOrigin set) is the natural shape for
|
||
// pages with no primary entity (dashboard, agenda, tools/*).
|
||
type TurnContext struct {
|
||
RouteName string `json:"route_name"`
|
||
PageOrigin string `json:"page_origin,omitempty"`
|
||
PrimaryEntityType string `json:"primary_entity_type,omitempty"`
|
||
PrimaryEntityID string `json:"primary_entity_id,omitempty"`
|
||
UserSelectionText string `json:"user_selection_text,omitempty"`
|
||
ViewMode string `json:"view_mode,omitempty"`
|
||
FilterSummary string `json:"filter_summary,omitempty"`
|
||
}
|
||
|
||
// MaxSelectionChars caps user_selection_text before it reaches the model.
|
||
// The widget client also enforces this so the truncation hint surfaces in
|
||
// the UI, not just server-side. 1000 chars is the design's privacy floor
|
||
// (§4.3).
|
||
const MaxSelectionChars = 1000
|
||
|
||
// EnvelopePrefix builds the `[ctx …]` block prepended to the user
|
||
// message in the tmux envelope. SKILL.md teaches Paliadin to read this
|
||
// prefix as authoritative context, not as instructions.
|
||
//
|
||
// Format: `[ctx route=<route> entity=<type>:<id> selection="<truncated>"
|
||
// view=<mode> filter="<summary>"]`. Fields are space-separated,
|
||
// quoted only when they may contain spaces. Empty fields are omitted.
|
||
//
|
||
// Returns "" when the context contributes nothing (RouteName empty AND
|
||
// no other fields set), so the envelope stays clean for legacy callers.
|
||
func (c *TurnContext) EnvelopePrefix() string {
|
||
if c == nil {
|
||
return ""
|
||
}
|
||
var parts []string
|
||
if c.RouteName != "" {
|
||
parts = append(parts, "route="+c.RouteName)
|
||
}
|
||
if c.PrimaryEntityType != "" && c.PrimaryEntityID != "" {
|
||
parts = append(parts, "entity="+c.PrimaryEntityType+":"+c.PrimaryEntityID)
|
||
}
|
||
if c.ViewMode != "" {
|
||
parts = append(parts, "view="+c.ViewMode)
|
||
}
|
||
if c.FilterSummary != "" {
|
||
parts = append(parts, "filter="+quoteEnvelopeValue(c.FilterSummary))
|
||
}
|
||
if c.UserSelectionText != "" {
|
||
// Always quote selection — it's user-supplied content (a quote
|
||
// from a notes field, a sentence from a deadline title), not a
|
||
// metadata token. Quoting unconditionally keeps the SKILL.md
|
||
// parser from misinterpreting whitespace boundaries on
|
||
// single-word selections.
|
||
sel := c.UserSelectionText
|
||
if len(sel) > MaxSelectionChars {
|
||
sel = sel[:MaxSelectionChars] + "…"
|
||
}
|
||
parts = append(parts, "selection="+forceQuoteEnvelopeValue(sel))
|
||
}
|
||
if len(parts) == 0 {
|
||
return ""
|
||
}
|
||
return "[ctx " + strings.Join(parts, " ") + "] "
|
||
}
|
||
|
||
// quoteEnvelopeValue wraps a value in double quotes if it contains a
|
||
// space, escaping any quotes in the value with `\"`. Cheap shell-like
|
||
// quoting — SKILL.md's parser is forgiving.
|
||
func quoteEnvelopeValue(s string) string {
|
||
if !strings.ContainsAny(s, " \t\"") {
|
||
return s
|
||
}
|
||
return forceQuoteEnvelopeValue(s)
|
||
}
|
||
|
||
// forceQuoteEnvelopeValue always wraps a value in double quotes,
|
||
// escaping inner quotes. Used for fields where the value is
|
||
// user-supplied content (selection text) and the SKILL.md parser must
|
||
// always know where the value ends regardless of whether it happens to
|
||
// contain whitespace.
|
||
func forceQuoteEnvelopeValue(s string) string {
|
||
return `"` + strings.ReplaceAll(s, `"`, `\"`) + `"`
|
||
}
|
||
|
||
// TurnResult is what RunTurn returns to the handler.
|
||
type TurnResult struct {
|
||
TurnID uuid.UUID
|
||
Response string // body without [paliadin-meta] trailer
|
||
UsedTools []string
|
||
RowsSeen []int
|
||
ChipCount int
|
||
ClassifierTag string
|
||
DurationMS int
|
||
}
|
||
|
||
// ErrPaliadinDisabled is the canonical "service is wired but turned off"
|
||
// signal. Handlers map it to 503.
|
||
var ErrPaliadinDisabled = errors.New("paliadin: disabled")
|
||
|
||
// ErrTmuxUnavailable indicates we couldn't talk to tmux (binary missing,
|
||
// session unreachable, etc.). Handlers map it to 503 with a hint.
|
||
var ErrTmuxUnavailable = errors.New("paliadin: tmux unavailable")
|
||
|
||
// RunTurn executes one full Q&A round. Blocks until Claude has written
|
||
// the response file or we time out (default 60 s). Writes the audit row
|
||
// in both success + error paths.
|
||
//
|
||
// PoC: serialised. The package-level turnMu enforces "one at a time".
|
||
// m is the only user, so this is fine.
|
||
func (s *LocalPaliadinService) RunTurn(ctx context.Context, req TurnRequest) (*TurnResult, error) {
|
||
s.turnMu.Lock()
|
||
defer s.turnMu.Unlock()
|
||
|
||
turnID := uuid.New()
|
||
startedAt := time.Now().UTC()
|
||
|
||
// Audit row — written *first* so a crash mid-turn still leaves traces.
|
||
if err := s.insertTurnRow(ctx, &PaliadinTurn{
|
||
TurnID: turnID,
|
||
UserID: req.UserID,
|
||
SessionID: req.SessionID,
|
||
StartedAt: startedAt,
|
||
UserMessage: req.UserMessage,
|
||
PageOrigin: optionalString(req.PageOrigin),
|
||
}, req.Context); err != nil {
|
||
return nil, fmt.Errorf("paliadin: insert turn row: %w", err)
|
||
}
|
||
|
||
// Ensure tmux session + Claude pane (per-user — keyed off UserID).
|
||
// isFresh signals that we just created the pane (no prior chat
|
||
// window existed) — when true AND we have prior turns for this user
|
||
// session, we splice a primer into the envelope so Claude wakes
|
||
// with conversation context instead of cold.
|
||
target, isFresh, err := s.ensurePane(ctx, req.UserID)
|
||
if err != nil {
|
||
_ = s.markTurnError(ctx, turnID, "tmux_unresponsive")
|
||
return nil, fmt.Errorf("%w: %v", ErrTmuxUnavailable, err)
|
||
}
|
||
|
||
// Make sure the response dir exists.
|
||
if err := os.MkdirAll(s.responseDir, 0o755); err != nil {
|
||
_ = s.markTurnError(ctx, turnID, "tmux_unresponsive")
|
||
return nil, fmt.Errorf("paliadin: mkdir response dir: %w", err)
|
||
}
|
||
|
||
// Send the framed prompt. The Paliadin skill at
|
||
// ~/.claude/skills/paliadin/SKILL.md description-matches on this
|
||
// envelope and writes the response to the per-turn file. The optional
|
||
// [ctx …] prefix carries structured page context from the inline
|
||
// widget (t-paliad-161); SKILL.md branches on it before answering.
|
||
primer := s.buildPrimerIfFresh(ctx, isFresh, req)
|
||
envelope := fmt.Sprintf("[PALIADIN:%s] %s%s%s",
|
||
turnID, primer, req.Context.EnvelopePrefix(), sanitiseForTmux(req.UserMessage))
|
||
if err := s.sendToPane(ctx, target, envelope); err != nil {
|
||
_ = s.markTurnError(ctx, turnID, "tmux_unresponsive")
|
||
return nil, fmt.Errorf("%w: send prompt: %v", ErrTmuxUnavailable, err)
|
||
}
|
||
|
||
// Poll for the response file. Fixed 60 s timeout; abort early if the
|
||
// caller's context is cancelled (e.g. user clicked Stop).
|
||
respPath := filepath.Join(s.responseDir, turnID.String()+".txt")
|
||
body, err := s.pollForResponse(ctx, respPath, 60*time.Second)
|
||
if err != nil {
|
||
ec := "timeout"
|
||
if errors.Is(err, context.Canceled) {
|
||
ec = "user_aborted"
|
||
}
|
||
_ = s.markTurnAbandonedOrError(ctx, turnID, ec, ec == "user_aborted")
|
||
return nil, err
|
||
}
|
||
|
||
// Strip + parse the [paliadin-meta] trailer. Best-effort: the prompt
|
||
// instructs Claude to emit it but the PoC's monitoring is precisely
|
||
// what tells us how reliable that is in practice.
|
||
cleanBody, meta := splitTrailer(body)
|
||
tokens := approxTokenCount(cleanBody)
|
||
chipCount := countChips(cleanBody)
|
||
finished := time.Now().UTC()
|
||
durationMS := int(finished.Sub(startedAt) / time.Millisecond)
|
||
|
||
// Write the result back into the audit row.
|
||
if err := s.completeTurn(ctx, turnID, finished, durationMS, cleanBody, tokens, meta, chipCount); err != nil {
|
||
log.Printf("paliadin: complete turn %s: %v", turnID, err)
|
||
// Don't fail the user-facing request on audit-row write errors —
|
||
// the response is real even if the bookkeeping is broken.
|
||
}
|
||
|
||
return &TurnResult{
|
||
TurnID: turnID,
|
||
Response: cleanBody,
|
||
UsedTools: meta.UsedTools,
|
||
RowsSeen: meta.RowsSeen,
|
||
ChipCount: chipCount,
|
||
ClassifierTag: meta.ClassifierTag,
|
||
DurationMS: durationMS,
|
||
}, nil
|
||
}
|
||
|
||
// ResetSession kills the user's tmux session entirely so the next
|
||
// RunTurn boots a fresh claude pane. With skill-based persona load
|
||
// (~/.claude/skills/paliadin/SKILL.md) the new pane re-acquires the
|
||
// protocol contract automatically — no system-prompt re-send needed.
|
||
func (s *LocalPaliadinService) ResetSession(ctx context.Context, userID uuid.UUID) error {
|
||
session := s.sessionNameFor(userID)
|
||
|
||
s.mu.Lock()
|
||
delete(s.panes, session)
|
||
s.mu.Unlock()
|
||
|
||
// `tmux kill-session` returns non-zero if the session doesn't exist;
|
||
// that's fine — the next RunTurn will recreate it. Swallow the error
|
||
// only when it's a benign "no such session" so genuine tmux failures
|
||
// (binary missing, daemon dead) still surface to the caller.
|
||
if err := runTmux(ctx, "has-session", "-t", session); err != nil {
|
||
return nil
|
||
}
|
||
return runTmux(ctx, "kill-session", "-t", session)
|
||
}
|
||
|
||
// ListRecentTurns reads the last N turns visible to the caller.
|
||
// global_admin sees everything; everyone else sees their own.
|
||
func (s *paliadinDB) ListRecentTurns(ctx context.Context, callerID uuid.UUID, limit int) ([]PaliadinTurn, error) {
|
||
if limit <= 0 || limit > 200 {
|
||
limit = 50
|
||
}
|
||
out := make([]PaliadinTurn, 0, limit)
|
||
q := `
|
||
SELECT t.turn_id, t.user_id, t.session_id, t.started_at, t.finished_at, t.duration_ms,
|
||
t.user_message, t.response, t.response_tokens, t.used_tools, t.rows_seen,
|
||
t.chip_count, t.abandoned, t.page_origin, t.error_code, t.classifier_tag,
|
||
u.email AS user_email, u.display_name AS user_display_name
|
||
FROM paliad.paliadin_turns t
|
||
LEFT JOIN paliad.users u ON u.id = t.user_id
|
||
WHERE t.user_id = $1
|
||
OR EXISTS (SELECT 1 FROM paliad.users gu
|
||
WHERE gu.id = $1 AND gu.global_role = 'global_admin')
|
||
ORDER BY t.started_at DESC
|
||
LIMIT $2
|
||
`
|
||
if err := s.db.SelectContext(ctx, &out, q, callerID, limit); err != nil {
|
||
return nil, fmt.Errorf("paliadin: list turns: %w", err)
|
||
}
|
||
return out, nil
|
||
}
|
||
|
||
// GetTurn returns a single turn the caller is allowed to see. Used by
|
||
// the chat UI's late-response polling (t-paliad-late): when a turn
|
||
// returned an error event over SSE, the FE polls this endpoint to
|
||
// discover whether the janitor patched the row with a late response.
|
||
//
|
||
// Visibility mirrors ListRecentTurns: own rows always, plus everything
|
||
// for global_admin. Returns sql.ErrNoRows when invisible/absent so the
|
||
// HTTP handler can map to 404 cleanly.
|
||
func (s *paliadinDB) GetTurn(ctx context.Context, callerID, turnID uuid.UUID) (*PaliadinTurn, error) {
|
||
var out PaliadinTurn
|
||
q := `
|
||
SELECT t.turn_id, t.user_id, t.session_id, t.started_at, t.finished_at, t.duration_ms,
|
||
t.user_message, t.response, t.response_tokens, t.used_tools, t.rows_seen,
|
||
t.chip_count, t.abandoned, t.page_origin, t.error_code, t.classifier_tag
|
||
FROM paliad.paliadin_turns t
|
||
WHERE t.turn_id = $1
|
||
AND (t.user_id = $2
|
||
OR EXISTS (SELECT 1 FROM paliad.users gu
|
||
WHERE gu.id = $2 AND gu.global_role = 'global_admin'))
|
||
`
|
||
if err := s.db.GetContext(ctx, &out, q, turnID, callerID); err != nil {
|
||
return nil, err
|
||
}
|
||
return &out, nil
|
||
}
|
||
|
||
// ListHistoryForSession returns the caller's turns for a given browser
|
||
// session id, oldest → newest. Both the inline drawer and the
|
||
// standalone /paliadin page hydrate from this on mount before
|
||
// consulting localStorage, so a crash / device swap / cross-surface
|
||
// jump still shows the same conversation. Limit defaults to 50.
|
||
//
|
||
// Visibility mirrors ListRecentTurns / GetTurn (own rows always; all
|
||
// rows for global_admin). Empty session_id returns no rows.
|
||
func (s *paliadinDB) ListHistoryForSession(ctx context.Context, callerID uuid.UUID, sessionID string, limit int) ([]PaliadinTurn, error) {
|
||
if strings.TrimSpace(sessionID) == "" {
|
||
return []PaliadinTurn{}, nil
|
||
}
|
||
if limit <= 0 || limit > 200 {
|
||
limit = 50
|
||
}
|
||
out := make([]PaliadinTurn, 0, limit)
|
||
q := `
|
||
SELECT t.turn_id, t.user_id, t.session_id, t.started_at, t.finished_at, t.duration_ms,
|
||
t.user_message, t.response, t.response_tokens, t.used_tools, t.rows_seen,
|
||
t.chip_count, t.abandoned, t.page_origin, t.error_code, t.classifier_tag
|
||
FROM paliad.paliadin_turns t
|
||
WHERE t.session_id = $1
|
||
AND (t.user_id = $2
|
||
OR EXISTS (SELECT 1 FROM paliad.users gu
|
||
WHERE gu.id = $2 AND gu.global_role = 'global_admin'))
|
||
ORDER BY t.started_at ASC
|
||
LIMIT $3
|
||
`
|
||
if err := s.db.SelectContext(ctx, &out, q, sessionID, callerID, limit); err != nil {
|
||
return nil, fmt.Errorf("paliadin: list history: %w", err)
|
||
}
|
||
return out, nil
|
||
}
|
||
|
||
// MaxPrimerTurns caps how many prior exchanges the crash-recovery
|
||
// primer replays into a fresh tmux pane. Each exchange is a (user,
|
||
// assistant) pair, so the prompt grows by ~2× this many lines plus the
|
||
// primer scaffolding. Five exchanges is enough to establish thread
|
||
// continuity ("we were just discussing the Acme project") without
|
||
// blowing out the prompt budget.
|
||
const MaxPrimerTurns = 5
|
||
|
||
// MaxPrimerCharsPerSide caps the user_message + response length per
|
||
// exchange that goes into the primer. Long answers from prior turns
|
||
// are truncated with an ellipsis so a runaway brick of text doesn't
|
||
// dominate the primer block.
|
||
const MaxPrimerCharsPerSide = 600
|
||
|
||
// buildPrimerIfFresh assembles the `[primer …][/primer]` block that
|
||
// gets prepended to the user envelope when the tmux pane was just
|
||
// created (or is unreachable for any other reason and we expect Claude
|
||
// to lack context). Returns "" when:
|
||
//
|
||
// - isFresh=false (existing pane has the conversation in memory)
|
||
// - no req.SessionID (legacy turn — nothing to recover)
|
||
// - the DB has no prior turns for this session (genuinely first turn)
|
||
// - the lookup itself errors (we degrade silently rather than block
|
||
// the user's actual question)
|
||
//
|
||
// The format SKILL.md parses:
|
||
//
|
||
// [primer last=<N>]
|
||
// U: <user message>
|
||
// A: <assistant response>
|
||
// …
|
||
// [/primer]
|
||
//
|
||
// SKILL.md treats the primer as authoritative recap, not as questions
|
||
// to re-answer. See ~/.claude/skills/paliadin/SKILL.md for the
|
||
// behaviour contract.
|
||
func (s *paliadinDB) buildPrimerIfFresh(ctx context.Context, isFresh bool, req TurnRequest) string {
|
||
if !isFresh || req.SessionID == "" {
|
||
return ""
|
||
}
|
||
rows, err := s.ListHistoryForSession(ctx, req.UserID, req.SessionID, MaxPrimerTurns)
|
||
if err != nil {
|
||
// Log + degrade silently. The user's actual question still gets
|
||
// sent; they just lose the conversation continuity for this one
|
||
// turn.
|
||
log.Printf("paliadin: primer history lookup: %v", err)
|
||
return ""
|
||
}
|
||
if len(rows) == 0 {
|
||
return ""
|
||
}
|
||
// rows are oldest → newest. Keep the newest MaxPrimerTurns; for the
|
||
// recovery use-case more recent context matters more.
|
||
if len(rows) > MaxPrimerTurns {
|
||
rows = rows[len(rows)-MaxPrimerTurns:]
|
||
}
|
||
|
||
var b strings.Builder
|
||
fmt.Fprintf(&b, "[primer last=%d] ", len(rows))
|
||
for _, row := range rows {
|
||
userMsg := truncateForPrimer(row.UserMessage)
|
||
b.WriteString("U: ")
|
||
b.WriteString(userMsg)
|
||
b.WriteString(" \\n ")
|
||
if row.Response != nil && *row.Response != "" {
|
||
assistantMsg := truncateForPrimer(*row.Response)
|
||
b.WriteString("A: ")
|
||
b.WriteString(assistantMsg)
|
||
b.WriteString(" \\n ")
|
||
}
|
||
}
|
||
b.WriteString("[/primer] ")
|
||
return b.String()
|
||
}
|
||
|
||
// truncateForPrimer normalises a message for the primer block: strips
|
||
// newlines (envelope is a single-line keystroke), collapses repeated
|
||
// whitespace, and truncates with an ellipsis when over the per-side
|
||
// cap. The output stays single-line so the tmux send-keys command
|
||
// doesn't fragment it.
|
||
func truncateForPrimer(s string) string {
|
||
s = strings.ReplaceAll(s, "\r", " ")
|
||
s = strings.ReplaceAll(s, "\n", " ")
|
||
// Collapse repeated whitespace.
|
||
for strings.Contains(s, " ") {
|
||
s = strings.ReplaceAll(s, " ", " ")
|
||
}
|
||
s = strings.TrimSpace(s)
|
||
if len(s) > MaxPrimerCharsPerSide {
|
||
s = s[:MaxPrimerCharsPerSide] + "…"
|
||
}
|
||
return s
|
||
}
|
||
|
||
// PaliadinStats is the aggregate view shown on /admin/paliadin.
|
||
type PaliadinStats struct {
|
||
TotalTurns int `json:"total_turns"`
|
||
TurnsLast7Days int `json:"turns_last_7_days"`
|
||
MedianDurationMS int `json:"median_duration_ms"`
|
||
P90DurationMS int `json:"p90_duration_ms"`
|
||
ToolUseRate float64 `json:"tool_use_rate"` // 0..1
|
||
AbandonRate float64 `json:"abandon_rate"` // 0..1
|
||
ByClassifier map[string]int `json:"by_classifier"` // tag → count
|
||
DailyCounts []PaliadinDailyCount `json:"daily_counts"` // last 30 days
|
||
TopPrompts []PaliadinPromptCount `json:"top_prompts"` // most-frequent normalised prompts
|
||
}
|
||
|
||
type PaliadinDailyCount struct {
|
||
Day string `db:"day" json:"day"` // YYYY-MM-DD
|
||
Count int `db:"count" json:"count"`
|
||
}
|
||
|
||
type PaliadinPromptCount struct {
|
||
Prompt string `db:"prompt" json:"prompt"`
|
||
Count int `db:"count" json:"count"`
|
||
}
|
||
|
||
// Stats computes the dashboard aggregate. global_admin sees everything;
|
||
// everyone else sees their own slice (PoC has only m, but the policy
|
||
// matches RLS on the table).
|
||
func (s *paliadinDB) Stats(ctx context.Context, callerID uuid.UUID) (*PaliadinStats, error) {
|
||
stats := &PaliadinStats{
|
||
ByClassifier: map[string]int{},
|
||
DailyCounts: []PaliadinDailyCount{},
|
||
TopPrompts: []PaliadinPromptCount{},
|
||
}
|
||
|
||
// Visibility predicate: caller's own rows OR all rows if global_admin.
|
||
visible := `(user_id = $1 OR EXISTS (SELECT 1 FROM paliad.users u WHERE u.id = $1 AND u.global_role = 'global_admin'))`
|
||
|
||
// Total + 7-day count.
|
||
if err := s.db.QueryRowxContext(ctx, fmt.Sprintf(`
|
||
SELECT COUNT(*),
|
||
COUNT(*) FILTER (WHERE started_at >= now() - interval '7 days')
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
`, visible), callerID).Scan(&stats.TotalTurns, &stats.TurnsLast7Days); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats totals: %w", err)
|
||
}
|
||
|
||
if stats.TotalTurns == 0 {
|
||
return stats, nil
|
||
}
|
||
|
||
// Duration percentiles. Skip rows still in flight (duration_ms NULL).
|
||
if err := s.db.QueryRowxContext(ctx, fmt.Sprintf(`
|
||
SELECT COALESCE(percentile_cont(0.5) WITHIN GROUP (ORDER BY duration_ms), 0)::int,
|
||
COALESCE(percentile_cont(0.9) WITHIN GROUP (ORDER BY duration_ms), 0)::int
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s AND duration_ms IS NOT NULL
|
||
`, visible), callerID).Scan(&stats.MedianDurationMS, &stats.P90DurationMS); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats percentiles: %w", err)
|
||
}
|
||
|
||
// Tool-use + abandon rates.
|
||
var toolUsedTurns, abandonedTurns int
|
||
if err := s.db.QueryRowxContext(ctx, fmt.Sprintf(`
|
||
SELECT COUNT(*) FILTER (WHERE array_length(used_tools, 1) > 0),
|
||
COUNT(*) FILTER (WHERE abandoned = true)
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
`, visible), callerID).Scan(&toolUsedTurns, &abandonedTurns); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats rates: %w", err)
|
||
}
|
||
stats.ToolUseRate = float64(toolUsedTurns) / float64(stats.TotalTurns)
|
||
stats.AbandonRate = float64(abandonedTurns) / float64(stats.TotalTurns)
|
||
|
||
// Histogram by classifier_tag.
|
||
rows, err := s.db.QueryxContext(ctx, fmt.Sprintf(`
|
||
SELECT COALESCE(classifier_tag, 'untagged') AS tag, COUNT(*) AS n
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
GROUP BY tag
|
||
`, visible), callerID)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats classifier: %w", err)
|
||
}
|
||
defer rows.Close()
|
||
for rows.Next() {
|
||
var tag string
|
||
var n int
|
||
if err := rows.Scan(&tag, &n); err != nil {
|
||
return nil, err
|
||
}
|
||
stats.ByClassifier[tag] = n
|
||
}
|
||
|
||
// Daily counts (last 30 days).
|
||
if err := s.db.SelectContext(ctx, &stats.DailyCounts, fmt.Sprintf(`
|
||
SELECT to_char(date_trunc('day', started_at), 'YYYY-MM-DD') AS day,
|
||
COUNT(*) AS count
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
AND started_at >= now() - interval '30 days'
|
||
GROUP BY day
|
||
ORDER BY day ASC
|
||
`, visible), callerID); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats daily: %w", err)
|
||
}
|
||
|
||
// Top prompts (normalised: lowercase + collapse whitespace + trim).
|
||
if err := s.db.SelectContext(ctx, &stats.TopPrompts, fmt.Sprintf(`
|
||
SELECT trim(regexp_replace(lower(user_message), '\s+', ' ', 'g')) AS prompt,
|
||
COUNT(*) AS count
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
GROUP BY prompt
|
||
ORDER BY count DESC, prompt ASC
|
||
LIMIT 10
|
||
`, visible), callerID); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats top prompts: %w", err)
|
||
}
|
||
|
||
return stats, nil
|
||
}
|
||
|
||
// =============================================================================
|
||
// tmux orchestration — adapted from mVoice/server.py:250-380.
|
||
// =============================================================================
|
||
|
||
// ensurePane returns the tmux target ("session:window-idx") of the live
|
||
// Claude pane for this user, creating both session and window if
|
||
// missing. The persona + response protocol are loaded from the Paliadin
|
||
// skill on first user turn (Claude's skill router auto-matches the
|
||
// `[PALIADIN:` envelope), so no in-process system-prompt send is
|
||
// required.
|
||
//
|
||
// The second return value (isFresh) is true when the pane was just now
|
||
// created (no prior @paliadin-scope=chat window existed). RunTurn uses
|
||
// this signal to prime the new pane with prior conversation context
|
||
// from paliad.paliadin_turns so a tmux/mRiver reboot doesn't strand
|
||
// users with a Claude that has no memory.
|
||
func (s *LocalPaliadinService) ensurePane(ctx context.Context, userID uuid.UUID) (string, bool, error) {
|
||
session := s.sessionNameFor(userID)
|
||
|
||
s.mu.Lock()
|
||
defer s.mu.Unlock()
|
||
|
||
// Cheap path: cached target still alive? Reuse.
|
||
if cached, ok := s.panes[session]; ok && cached != "" && s.paneAlive(ctx, cached) {
|
||
return cached, false, nil
|
||
}
|
||
|
||
// Ensure session.
|
||
if err := runTmux(ctx, "has-session", "-t", session); err != nil {
|
||
// Create detached.
|
||
if err := runTmux(ctx, "new-session", "-d", "-s", session); err != nil {
|
||
return "", false, fmt.Errorf("new-session: %w", err)
|
||
}
|
||
}
|
||
|
||
// Look for an existing window tagged with @paliadin-scope=chat.
|
||
if existing := s.findChatWindow(ctx, session); existing != "" {
|
||
s.panes[session] = existing
|
||
return existing, false, nil
|
||
}
|
||
|
||
// No window — create one running `claude` in a fresh pane. Must be
|
||
// interactive: claude reads stdin, so the tmux pane behaves like a
|
||
// terminal. We use `new-window -P -F` to print the new index back.
|
||
out, err := runTmuxOut(ctx, "new-window", "-t", session,
|
||
"-n", "claude-paliadin",
|
||
"-P", "-F", "#{window_index}",
|
||
"claude")
|
||
if err != nil {
|
||
return "", false, fmt.Errorf("new-window claude: %w", err)
|
||
}
|
||
idx := strings.TrimSpace(out)
|
||
target := fmt.Sprintf("%s:%s", session, idx)
|
||
|
||
// Wait for Claude's prompt indicator. Claude Code's interactive
|
||
// prompt rendering varies but always settles into a state where the
|
||
// pane has a "❯" prompt glyph or "│" sidebar visible. We give it
|
||
// 30 s, which is generous.
|
||
if err := s.waitForPaneReady(ctx, target, 30*time.Second); err != nil {
|
||
return "", false, fmt.Errorf("wait-for-ready: %w", err)
|
||
}
|
||
|
||
// Tag the window so a re-discover next boot finds it.
|
||
_ = runTmux(ctx, "set-window-option", "-t", target, "@paliadin-scope", "chat")
|
||
_ = runTmux(ctx, "set-window-option", "-t", target, "@fix-name", "claude-paliadin")
|
||
|
||
s.panes[session] = target
|
||
return target, true, nil
|
||
}
|
||
|
||
func (s *LocalPaliadinService) findChatWindow(ctx context.Context, session string) string {
|
||
out, err := runTmuxOut(ctx, "list-windows", "-t", session,
|
||
"-F", "#{window_index}")
|
||
if err != nil {
|
||
return ""
|
||
}
|
||
for _, idx := range strings.Fields(out) {
|
||
target := fmt.Sprintf("%s:%s", session, idx)
|
||
scope, err := runTmuxOut(ctx, "show-window-option",
|
||
"-t", target, "-v", "@paliadin-scope")
|
||
if err == nil && strings.TrimSpace(scope) == "chat" {
|
||
return target
|
||
}
|
||
}
|
||
return ""
|
||
}
|
||
|
||
func (s *LocalPaliadinService) paneAlive(ctx context.Context, target string) bool {
|
||
if err := runTmux(ctx, "has-session", "-t", target); err != nil {
|
||
return false
|
||
}
|
||
return true
|
||
}
|
||
|
||
func (s *LocalPaliadinService) waitForPaneReady(ctx context.Context, target string, timeout time.Duration) error {
|
||
deadline := time.Now().Add(timeout)
|
||
for time.Now().Before(deadline) {
|
||
select {
|
||
case <-ctx.Done():
|
||
return ctx.Err()
|
||
default:
|
||
}
|
||
out, err := runTmuxOut(ctx, "capture-pane", "-t", target, "-p")
|
||
if err == nil && (strings.Contains(out, "❯") || strings.Contains(out, "│")) {
|
||
return nil
|
||
}
|
||
time.Sleep(500 * time.Millisecond)
|
||
}
|
||
return fmt.Errorf("pane %s not ready within %s", target, timeout)
|
||
}
|
||
|
||
func (s *LocalPaliadinService) sendToPane(ctx context.Context, target, msg string) error {
|
||
// `-l` sends the message literally (no key parsing) — necessary so
|
||
// our prompt's special characters don't get interpreted.
|
||
if err := runTmux(ctx, "send-keys", "-t", target, "-l", msg); err != nil {
|
||
return err
|
||
}
|
||
// Settle delay between the literal paste and the Enter. Claude Code's
|
||
// TUI debounces keyboard input; if Enter lands while the paste is
|
||
// still being absorbed, the carriage-return collapses into the input
|
||
// buffer as a literal newline character instead of registering as a
|
||
// "submit" gesture, leaving the prompt typed but unsubmitted (m's
|
||
// dogfood 2026-05-08 20:35: "lacking an enter key... or too fast").
|
||
// 200ms is below the human-perceptible threshold but well above
|
||
// tmux's pty flush window. Mirrors scripts/paliadin-shim:send_to_pane.
|
||
select {
|
||
case <-ctx.Done():
|
||
return ctx.Err()
|
||
case <-time.After(200 * time.Millisecond):
|
||
}
|
||
// Trailing Enter. tmux send-keys treats "Enter" as a special key name.
|
||
if err := runTmux(ctx, "send-keys", "-t", target, "Enter"); err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// StartJanitor launches the late-response watcher. Idempotent — calling
|
||
// it more than once is a no-op. The watcher scans responseDir every
|
||
// janitorInterval; when it finds a `<turn_id>.txt` file whose row has
|
||
// no response yet (typically because RunTurn already returned with
|
||
// error_code='timeout'), it parses the file, populates the row, and
|
||
// removes the file. Without this, late responses written by Claude
|
||
// after the 60 s HTTP poll window are stranded on disk and the chat UI
|
||
// is permanently in sync with the audit DB.
|
||
func (s *LocalPaliadinService) StartJanitor(ctx context.Context) {
|
||
s.janitorOnce.Do(func() {
|
||
go s.runJanitor(ctx)
|
||
})
|
||
}
|
||
|
||
const janitorInterval = 2 * time.Second
|
||
|
||
func (s *LocalPaliadinService) runJanitor(ctx context.Context) {
|
||
t := time.NewTicker(janitorInterval)
|
||
defer t.Stop()
|
||
for {
|
||
select {
|
||
case <-ctx.Done():
|
||
return
|
||
case <-t.C:
|
||
s.scanResponseDir(ctx)
|
||
}
|
||
}
|
||
}
|
||
|
||
func (s *LocalPaliadinService) scanResponseDir(ctx context.Context) {
|
||
entries, err := os.ReadDir(s.responseDir)
|
||
if err != nil {
|
||
return
|
||
}
|
||
for _, e := range entries {
|
||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".txt") {
|
||
continue
|
||
}
|
||
name := strings.TrimSuffix(e.Name(), ".txt")
|
||
turnID, err := uuid.Parse(name)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
s.processLateFile(ctx, turnID)
|
||
}
|
||
}
|
||
|
||
// processLateFile patches the row when the response arrives after the
|
||
// HTTP poll window. It refuses to touch rows whose response is already
|
||
// populated (file is stale; remove and move on) and rows whose
|
||
// finished_at is still NULL (RunTurn is the live owner of that file —
|
||
// don't race it).
|
||
func (s *LocalPaliadinService) processLateFile(ctx context.Context, turnID uuid.UUID) {
|
||
path := filepath.Join(s.responseDir, turnID.String()+".txt")
|
||
|
||
var row struct {
|
||
StartedAt time.Time `db:"started_at"`
|
||
Finished *time.Time `db:"finished_at"`
|
||
Response *string `db:"response"`
|
||
ErrorCode *string `db:"error_code"`
|
||
}
|
||
err := s.db.GetContext(ctx, &row, `
|
||
SELECT started_at, finished_at, response, error_code
|
||
FROM paliad.paliadin_turns
|
||
WHERE turn_id = $1`, turnID)
|
||
if err != nil {
|
||
// No row → orphan file from a previous boot or a different
|
||
// process. Don't risk consuming someone else's data; log via
|
||
// noop and let it sit. (We only delete files we've patched.)
|
||
return
|
||
}
|
||
// Live turn — RunTurn owns the file. Skip; pollForResponse will
|
||
// pick it up on its next 200 ms cycle.
|
||
if row.Finished == nil {
|
||
return
|
||
}
|
||
// Already complete — file is stale. Remove so the dir doesn't grow.
|
||
if row.Response != nil && *row.Response != "" {
|
||
_ = os.Remove(path)
|
||
return
|
||
}
|
||
|
||
data, err := os.ReadFile(path)
|
||
if err != nil || len(data) == 0 {
|
||
return
|
||
}
|
||
// Settle window mirrors pollForResponse — Claude may still be
|
||
// flushing the trailing bytes when we first see the file.
|
||
time.Sleep(50 * time.Millisecond)
|
||
data, err = os.ReadFile(path)
|
||
if err != nil || len(data) == 0 {
|
||
return
|
||
}
|
||
|
||
body := string(data)
|
||
cleanBody, meta := splitTrailer(body)
|
||
tokens := approxTokenCount(cleanBody)
|
||
chipCount := countChips(cleanBody)
|
||
finished := time.Now().UTC()
|
||
durationMS := int(finished.Sub(row.StartedAt) / time.Millisecond)
|
||
|
||
if err := s.completeTurnLate(ctx, turnID, finished, durationMS, cleanBody, tokens, meta, chipCount); err != nil {
|
||
log.Printf("paliadin janitor: complete late turn %s: %v", turnID, err)
|
||
return
|
||
}
|
||
_ = os.Remove(path)
|
||
log.Printf("paliadin janitor: patched late response for turn %s (%d ms after start)", turnID, durationMS)
|
||
}
|
||
|
||
// pollForResponse waits for the response file to materialise. Returns
|
||
// the file's content (and removes the file). Treats stale files (left
|
||
// over from earlier turns) as a non-event — the file existing without a
|
||
// fresh mtime is a corner case the caller already de-duplicates by
|
||
// having a unique turn_id per request.
|
||
func (s *LocalPaliadinService) pollForResponse(ctx context.Context, path string, timeout time.Duration) (string, error) {
|
||
deadline := time.Now().Add(timeout)
|
||
for time.Now().Before(deadline) {
|
||
select {
|
||
case <-ctx.Done():
|
||
return "", ctx.Err()
|
||
default:
|
||
}
|
||
data, err := os.ReadFile(path)
|
||
if err == nil && len(data) > 0 {
|
||
// Brief settle delay so we don't read mid-flush.
|
||
time.Sleep(50 * time.Millisecond)
|
||
data, _ = os.ReadFile(path)
|
||
_ = os.Remove(path)
|
||
return string(data), nil
|
||
}
|
||
time.Sleep(200 * time.Millisecond)
|
||
}
|
||
return "", fmt.Errorf("paliadin: response timeout after %s", timeout)
|
||
}
|
||
|
||
// =============================================================================
|
||
// shell / tmux helpers.
|
||
// =============================================================================
|
||
|
||
// runTmux runs `tmux <args...>`. Discards output. Returns error if tmux
|
||
// returns non-zero.
|
||
func runTmux(ctx context.Context, args ...string) error {
|
||
c, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||
defer cancel()
|
||
cmd := exec.CommandContext(c, "tmux", args...)
|
||
var stderr bytes.Buffer
|
||
cmd.Stderr = &stderr
|
||
if err := cmd.Run(); err != nil {
|
||
return fmt.Errorf("tmux %s: %w (stderr: %s)", strings.Join(args, " "), err, stderr.String())
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// runTmuxOut runs `tmux <args...>` and returns stdout. Useful for
|
||
// capture-pane / list-windows / show-window-option.
|
||
func runTmuxOut(ctx context.Context, args ...string) (string, error) {
|
||
c, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||
defer cancel()
|
||
cmd := exec.CommandContext(c, "tmux", args...)
|
||
var stdout, stderr bytes.Buffer
|
||
cmd.Stdout = &stdout
|
||
cmd.Stderr = &stderr
|
||
if err := cmd.Run(); err != nil {
|
||
return "", fmt.Errorf("tmux %s: %w (stderr: %s)", strings.Join(args, " "), err, stderr.String())
|
||
}
|
||
return stdout.String(), nil
|
||
}
|
||
|
||
// sanitiseForTmux removes control sequences that would confuse the pane.
|
||
// `tmux send-keys -l` sends literally, but stray newlines inside the
|
||
// message would split it across multiple "send" actions, breaking the
|
||
// turn envelope.
|
||
func sanitiseForTmux(s string) string {
|
||
s = strings.ReplaceAll(s, "\r", " ")
|
||
s = strings.ReplaceAll(s, "\n", " ")
|
||
// Cap length: a runaway prompt is a footgun.
|
||
const maxLen = 8000
|
||
if len(s) > maxLen {
|
||
s = s[:maxLen] + " […truncated]"
|
||
}
|
||
return s
|
||
}
|
||
|
||
// =============================================================================
|
||
// trailer parsing.
|
||
// =============================================================================
|
||
|
||
// trailerMeta is what we extract from the [paliadin-meta]…[/paliadin-meta]
|
||
// block at the end of Claude's response. Best-effort: missing fields
|
||
// default to zero values.
|
||
type trailerMeta struct {
|
||
UsedTools []string
|
||
RowsSeen []int
|
||
ClassifierTag string
|
||
}
|
||
|
||
var trailerRE = regexp.MustCompile(`(?s)\n*---\s*\n+\[paliadin-meta\]\s*\n(.+?)\n\[/paliadin-meta\]\s*$`)
|
||
|
||
// splitTrailer separates the meta block from the body. If no trailer is
|
||
// present, the entire input is returned as the body.
|
||
func splitTrailer(body string) (string, trailerMeta) {
|
||
body = strings.TrimRight(body, " \t\n\r")
|
||
m := trailerRE.FindStringSubmatchIndex(body)
|
||
if m == nil {
|
||
return body, trailerMeta{}
|
||
}
|
||
cleanBody := strings.TrimRight(body[:m[0]], " \t\n\r")
|
||
metaText := body[m[2]:m[3]]
|
||
return cleanBody, parseTrailer(metaText)
|
||
}
|
||
|
||
func parseTrailer(text string) trailerMeta {
|
||
out := trailerMeta{}
|
||
for _, line := range strings.Split(text, "\n") {
|
||
k, v, ok := splitFirst(strings.TrimSpace(line), ":")
|
||
if !ok {
|
||
continue
|
||
}
|
||
v = strings.TrimSpace(v)
|
||
switch strings.ToLower(strings.TrimSpace(k)) {
|
||
case "used_tools":
|
||
for _, t := range strings.Split(v, ",") {
|
||
t = strings.TrimSpace(t)
|
||
if t != "" {
|
||
out.UsedTools = append(out.UsedTools, t)
|
||
}
|
||
}
|
||
case "rows_seen":
|
||
for _, t := range strings.Split(v, ",") {
|
||
n, err := strconv.Atoi(strings.TrimSpace(t))
|
||
if err == nil {
|
||
out.RowsSeen = append(out.RowsSeen, n)
|
||
}
|
||
}
|
||
case "classifier_tag":
|
||
out.ClassifierTag = v
|
||
}
|
||
}
|
||
return out
|
||
}
|
||
|
||
func splitFirst(s, sep string) (string, string, bool) {
|
||
i := strings.Index(s, sep)
|
||
if i < 0 {
|
||
return "", "", false
|
||
}
|
||
return s[:i], s[i+len(sep):], true
|
||
}
|
||
|
||
// approxTokenCount is a coarse word-count × 1.3 heuristic. Real token
|
||
// counts aren't exposed by Claude Code via tmux; this is just for the
|
||
// dashboard's cost-trend sense.
|
||
func approxTokenCount(s string) int {
|
||
if s == "" {
|
||
return 0
|
||
}
|
||
words := strings.Fields(s)
|
||
return int(float64(len(words)) * 1.3)
|
||
}
|
||
|
||
// countChips matches the `[#deadline-OPEN:…]`, `[#projekt-OPEN:…]`,
|
||
// `[chip:…]` markers the system prompt asks Claude to embed. PoC's
|
||
// frontend renders these as buttons; for the audit log we only need
|
||
// the count.
|
||
var chipRE = regexp.MustCompile(`\[(?:#[a-z]+-OPEN:[A-Za-z0-9\-_]+|chip:[a-z]+:[^\]]+)\]`)
|
||
|
||
func countChips(s string) int {
|
||
return len(chipRE.FindAllString(s, -1))
|
||
}
|
||
|
||
// =============================================================================
|
||
// audit-row writers.
|
||
// =============================================================================
|
||
|
||
func (s *paliadinDB) insertTurnRow(ctx context.Context, t *PaliadinTurn, tctx *TurnContext) error {
|
||
// context is jsonb (migration 070). nil → SQL NULL; non-nil → JSON
|
||
// blob. Marshal once here so callers stay simple.
|
||
var ctxJSON []byte
|
||
if tctx != nil {
|
||
var err error
|
||
ctxJSON, err = json.Marshal(tctx)
|
||
if err != nil {
|
||
return fmt.Errorf("marshal turn context: %w", err)
|
||
}
|
||
}
|
||
q := `
|
||
INSERT INTO paliad.paliadin_turns (
|
||
turn_id, user_id, session_id, started_at, user_message, page_origin, context
|
||
) VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||
`
|
||
_, err := s.db.ExecContext(ctx, q,
|
||
t.TurnID, t.UserID, t.SessionID, t.StartedAt, t.UserMessage, t.PageOrigin, nullJSON(ctxJSON))
|
||
return err
|
||
}
|
||
|
||
// nullJSON returns nil for an empty / nil byte slice so pq writes SQL
|
||
// NULL instead of an empty-string jsonb. Without this, paliadin_turns.context
|
||
// would store `null` (the JSON literal) for legacy turns instead of true NULL.
|
||
func nullJSON(b []byte) any {
|
||
if len(b) == 0 {
|
||
return nil
|
||
}
|
||
return b
|
||
}
|
||
|
||
func (s *paliadinDB) completeTurn(ctx context.Context, turnID uuid.UUID,
|
||
finishedAt time.Time, durationMS int, response string, tokens int,
|
||
meta trailerMeta, chipCount int) error {
|
||
// used_tools and rows_seen are NOT NULL in the schema (default '{}').
|
||
// parseTrailer leaves them nil when Claude omits the trailer or the
|
||
// turn has no tool calls (casual chat). pq treats nil slices as NULL,
|
||
// so we must coerce to a non-nil empty array on every path.
|
||
usedTools := make(pq.StringArray, 0, len(meta.UsedTools))
|
||
for _, t := range meta.UsedTools {
|
||
usedTools = append(usedTools, t)
|
||
}
|
||
rowsSeen := make(pq.Int64Array, 0, len(meta.RowsSeen))
|
||
for _, n := range meta.RowsSeen {
|
||
rowsSeen = append(rowsSeen, int64(n))
|
||
}
|
||
q := `
|
||
UPDATE paliad.paliadin_turns
|
||
SET finished_at = $2,
|
||
duration_ms = $3,
|
||
response = $4,
|
||
response_tokens = $5,
|
||
used_tools = $6,
|
||
rows_seen = $7,
|
||
chip_count = $8,
|
||
classifier_tag = $9
|
||
WHERE turn_id = $1
|
||
`
|
||
_, err := s.db.ExecContext(ctx, q,
|
||
turnID, finishedAt, durationMS, response, tokens,
|
||
usedTools, rowsSeen, chipCount,
|
||
optionalString(meta.ClassifierTag))
|
||
return err
|
||
}
|
||
|
||
// completeTurnLate is completeTurn's twin for the janitor path: the
|
||
// row already has finished_at + error_code='timeout' (or similar); we
|
||
// stamp `error_code='late'` so the chat UI can render a marker, refresh
|
||
// finished_at + duration_ms to the actual completion time, and fill in
|
||
// the response payload. Guarded with `WHERE response IS NULL` so we
|
||
// can never overwrite a real response if there's a race.
|
||
func (s *paliadinDB) completeTurnLate(ctx context.Context, turnID uuid.UUID,
|
||
finishedAt time.Time, durationMS int, response string, tokens int,
|
||
meta trailerMeta, chipCount int) error {
|
||
usedTools := make(pq.StringArray, 0, len(meta.UsedTools))
|
||
for _, t := range meta.UsedTools {
|
||
usedTools = append(usedTools, t)
|
||
}
|
||
rowsSeen := make(pq.Int64Array, 0, len(meta.RowsSeen))
|
||
for _, n := range meta.RowsSeen {
|
||
rowsSeen = append(rowsSeen, int64(n))
|
||
}
|
||
q := `
|
||
UPDATE paliad.paliadin_turns
|
||
SET finished_at = $2,
|
||
duration_ms = $3,
|
||
response = $4,
|
||
response_tokens = $5,
|
||
used_tools = $6,
|
||
rows_seen = $7,
|
||
chip_count = $8,
|
||
classifier_tag = $9,
|
||
error_code = 'late'
|
||
WHERE turn_id = $1 AND response IS NULL
|
||
`
|
||
_, err := s.db.ExecContext(ctx, q,
|
||
turnID, finishedAt, durationMS, response, tokens,
|
||
usedTools, rowsSeen, chipCount,
|
||
optionalString(meta.ClassifierTag))
|
||
return err
|
||
}
|
||
|
||
func (s *paliadinDB) markTurnError(ctx context.Context, turnID uuid.UUID, code string) error {
|
||
finished := time.Now().UTC()
|
||
q := `
|
||
UPDATE paliad.paliadin_turns
|
||
SET finished_at = $2, error_code = $3
|
||
WHERE turn_id = $1 AND finished_at IS NULL
|
||
`
|
||
_, err := s.db.ExecContext(ctx, q, turnID, finished, code)
|
||
return err
|
||
}
|
||
|
||
func (s *paliadinDB) markTurnAbandonedOrError(ctx context.Context, turnID uuid.UUID, code string, abandoned bool) error {
|
||
finished := time.Now().UTC()
|
||
q := `
|
||
UPDATE paliad.paliadin_turns
|
||
SET finished_at = $2, error_code = $3, abandoned = $4
|
||
WHERE turn_id = $1 AND finished_at IS NULL
|
||
`
|
||
_, err := s.db.ExecContext(ctx, q, turnID, finished, code, abandoned)
|
||
return err
|
||
}
|
||
|
||
func optionalString(s string) *string {
|
||
if s == "" {
|
||
return nil
|
||
}
|
||
return &s
|
||
}
|
||
|
||
// Compile-time type guards (catches sql.ErrNoRows shifts).
|
||
var _ error = sql.ErrNoRows
|