Phase B step 1 of the Tailscale-SSH route to mRiver. Splits the existing local-tmux PoC into a Paliadin interface with two implementations; the remote-SSH backend lands in a follow-up commit (paliadin_remote.go). Surface: - Paliadin interface — RunTurn, ResetSession, ListRecentTurns, Stats, IsOwner. The handler at internal/handlers/paliadin.go now talks to this instead of the concrete struct. - paliadinDB — embedded base type carrying the audit-table I/O (insertTurnRow, completeTurn, markTurnError, markTurnAbandonedOrError) plus the read-side queries (IsOwner, ListRecentTurns, Stats). Both Local and Remote impls inherit these by embedding paliadinDB so the remote path doesn't have to duplicate any DB code. - LocalPaliadinService — the renamed PoC backend. Identical behaviour to the previous PaliadinService; only the type name and method receivers change. Method receivers split: tmux-specific operations (RunTurn, ResetSession, ensurePane, sendToPane, pollForResponse, etc.) stay on *LocalPaliadinService; DB-only operations promote to *paliadinDB. Wiring: - internal/handlers/handlers.go — Paliadin field becomes the interface type; Register() unchanged. - cmd/server/main.go — calls NewLocalPaliadinService instead of NewPaliadinService. The remote-vs-local switch on PALIADIN_REMOTE_HOST lands in B5. Tests in paliadin_test.go all green — they test package-level functions (splitTrailer, countChips, approxTokenCount, sanitiseForTmux, PaliadinOwnerEmail) and don't touch the renamed struct. No behaviour change on the local-tmux path. Refs m/paliad#12
782 lines
27 KiB
Go
782 lines
27 KiB
Go
package services
|
||
|
||
// Paliadin — the in-app AI buddy. Two implementations of the same
|
||
// interface, picked at boot time (see cmd/server/main.go):
|
||
//
|
||
// - LocalPaliadinService — talks to a `claude` CLI in a local tmux
|
||
// session. The PoC path (t-paliad-146); used on m's laptop.
|
||
// - RemotePaliadinService — shells out to ssh on mRiver where the
|
||
// long-lived tmux+claude pane lives. The prod path (t-paliad-151);
|
||
// used by the paliad.de Dokploy container, which has no `claude`
|
||
// CLI of its own.
|
||
//
|
||
// Designs:
|
||
// - docs/design-paliadin-2026-05-07.md (PoC architecture)
|
||
// - docs/design-paliadin-tailscale-ssh-2026-05-07.md (remote routing)
|
||
//
|
||
// Both implementations share the audit-table I/O (paliadinDB) and the
|
||
// trailer parser. The conversation state (turn ordering, response file
|
||
// polling) is split: Local owns the tmux pane directly; Remote delegates
|
||
// to the paliadin-shim on mRiver and reads the file there.
|
||
|
||
import (
|
||
"bytes"
|
||
"context"
|
||
"database/sql"
|
||
"errors"
|
||
"fmt"
|
||
"log"
|
||
"os"
|
||
"os/exec"
|
||
"path/filepath"
|
||
"regexp"
|
||
"strconv"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/google/uuid"
|
||
"github.com/jmoiron/sqlx"
|
||
"github.com/lib/pq"
|
||
)
|
||
|
||
// PaliadinOwnerEmail is the only account allowed to use the Paliadin
|
||
// PoC. Hardcoded — by design — so the gate cannot be flipped via a
|
||
// deploy env var. PoC ships at this scope; multi-user opens up only
|
||
// when production v1 lands behind its own auth model.
|
||
//
|
||
// Matches the seed in migration 023 (m's job_title row). If m's email
|
||
// ever rotates, this constant must rotate with it; there is no other
|
||
// path to enabling Paliadin.
|
||
const PaliadinOwnerEmail = "matthias.siebels@hoganlovells.com"
|
||
|
||
// Paliadin is the interface every Paliadin backend implements. Two
|
||
// production implementations: LocalPaliadinService (local tmux+claude)
|
||
// and RemotePaliadinService (ssh+paliadin-shim on mRiver). A
|
||
// DisabledPaliadinService stub is constructed when neither is available
|
||
// so callers don't have to nil-check on every entry point.
|
||
type Paliadin interface {
|
||
RunTurn(ctx context.Context, req TurnRequest) (*TurnResult, error)
|
||
ResetSession(ctx context.Context) error
|
||
ListRecentTurns(ctx context.Context, callerID uuid.UUID, limit int) ([]PaliadinTurn, error)
|
||
Stats(ctx context.Context, callerID uuid.UUID) (*PaliadinStats, error)
|
||
IsOwner(ctx context.Context, userID uuid.UUID) (bool, error)
|
||
}
|
||
|
||
// paliadinDB is the audit-table read/write surface shared by every
|
||
// Paliadin implementation. Embedded in LocalPaliadinService and
|
||
// RemotePaliadinService so they inherit IsOwner / ListRecentTurns /
|
||
// Stats and the per-turn row writers without duplication.
|
||
type paliadinDB struct {
|
||
db *sqlx.DB
|
||
users *UserService
|
||
}
|
||
|
||
// LocalPaliadinService runs the local tmux+claude PoC (t-paliad-146).
|
||
// Hardcoded single-user, single-tmux-window scope. Used on m's laptop;
|
||
// not deployed to prod (the Dokploy container has no `claude` CLI —
|
||
// see RemotePaliadinService for that path).
|
||
type LocalPaliadinService struct {
|
||
paliadinDB
|
||
tmuxSession string
|
||
responseDir string
|
||
|
||
// Cached pane target ("session:window-idx") once the voice window is
|
||
// either discovered or created. Reset to "" if the pane dies.
|
||
mu sync.Mutex
|
||
paneTarget string
|
||
|
||
// Single in-flight turn at a time. PoC scope — one user (m), serialised
|
||
// by a session-level mutex. Production v1 would queue / fan out.
|
||
turnMu sync.Mutex
|
||
}
|
||
|
||
// IsOwner returns true when the given user_id corresponds to m's
|
||
// account (the only Paliadin PoC user). Resolves via paliad.users.email
|
||
// rather than caching a UUID so a DB rebuild that reassigns auth UUIDs
|
||
// doesn't strand the gate.
|
||
//
|
||
// Returns (false, nil) for any other user — including unknown UUIDs and
|
||
// users without an email row. Errors only on DB failure.
|
||
func (s *paliadinDB) IsOwner(ctx context.Context, userID uuid.UUID) (bool, error) {
|
||
var email string
|
||
err := s.db.QueryRowxContext(ctx,
|
||
`SELECT email FROM paliad.users WHERE id = $1`, userID).Scan(&email)
|
||
if errors.Is(err, sql.ErrNoRows) {
|
||
return false, nil
|
||
}
|
||
if err != nil {
|
||
return false, fmt.Errorf("paliadin: lookup owner: %w", err)
|
||
}
|
||
return strings.EqualFold(email, PaliadinOwnerEmail), nil
|
||
}
|
||
|
||
// NewLocalPaliadinService wires the local-tmux PoC backend. Falls back
|
||
// to default tmux session + response dir when env vars are empty.
|
||
func NewLocalPaliadinService(db *sqlx.DB, users *UserService, tmuxSession, responseDir string) *LocalPaliadinService {
|
||
if tmuxSession == "" {
|
||
tmuxSession = "paliad-paliadin"
|
||
}
|
||
if responseDir == "" {
|
||
responseDir = "/tmp/paliadin"
|
||
}
|
||
return &LocalPaliadinService{
|
||
paliadinDB: paliadinDB{db: db, users: users},
|
||
tmuxSession: tmuxSession,
|
||
responseDir: responseDir,
|
||
}
|
||
}
|
||
|
||
// PaliadinTurn is the audit row.
|
||
type PaliadinTurn struct {
|
||
TurnID uuid.UUID `db:"turn_id" json:"turn_id"`
|
||
UserID uuid.UUID `db:"user_id" json:"user_id"`
|
||
SessionID string `db:"session_id" json:"session_id"`
|
||
StartedAt time.Time `db:"started_at" json:"started_at"`
|
||
FinishedAt *time.Time `db:"finished_at" json:"finished_at,omitempty"`
|
||
DurationMS *int `db:"duration_ms" json:"duration_ms,omitempty"`
|
||
UserMessage string `db:"user_message" json:"user_message"`
|
||
Response *string `db:"response" json:"response,omitempty"`
|
||
ResponseTokens *int `db:"response_tokens" json:"response_tokens,omitempty"`
|
||
UsedTools pq.StringArray `db:"used_tools" json:"used_tools"`
|
||
RowsSeen pq.Int64Array `db:"rows_seen" json:"rows_seen"`
|
||
ChipCount int `db:"chip_count" json:"chip_count"`
|
||
Abandoned bool `db:"abandoned" json:"abandoned"`
|
||
PageOrigin *string `db:"page_origin" json:"page_origin,omitempty"`
|
||
ErrorCode *string `db:"error_code" json:"error_code,omitempty"`
|
||
ClassifierTag *string `db:"classifier_tag" json:"classifier_tag,omitempty"`
|
||
}
|
||
|
||
// TurnRequest is what the handler passes to RunTurn.
|
||
type TurnRequest struct {
|
||
UserID uuid.UUID
|
||
SessionID string
|
||
UserMessage string
|
||
PageOrigin string // empty when unknown
|
||
}
|
||
|
||
// TurnResult is what RunTurn returns to the handler.
|
||
type TurnResult struct {
|
||
TurnID uuid.UUID
|
||
Response string // body without [paliadin-meta] trailer
|
||
UsedTools []string
|
||
RowsSeen []int
|
||
ChipCount int
|
||
ClassifierTag string
|
||
DurationMS int
|
||
}
|
||
|
||
// ErrPaliadinDisabled is the canonical "service is wired but turned off"
|
||
// signal. Handlers map it to 503.
|
||
var ErrPaliadinDisabled = errors.New("paliadin: disabled")
|
||
|
||
// ErrTmuxUnavailable indicates we couldn't talk to tmux (binary missing,
|
||
// session unreachable, etc.). Handlers map it to 503 with a hint.
|
||
var ErrTmuxUnavailable = errors.New("paliadin: tmux unavailable")
|
||
|
||
// RunTurn executes one full Q&A round. Blocks until Claude has written
|
||
// the response file or we time out (default 60 s). Writes the audit row
|
||
// in both success + error paths.
|
||
//
|
||
// PoC: serialised. The package-level turnMu enforces "one at a time".
|
||
// m is the only user, so this is fine.
|
||
func (s *LocalPaliadinService) RunTurn(ctx context.Context, req TurnRequest) (*TurnResult, error) {
|
||
s.turnMu.Lock()
|
||
defer s.turnMu.Unlock()
|
||
|
||
turnID := uuid.New()
|
||
startedAt := time.Now().UTC()
|
||
|
||
// Audit row — written *first* so a crash mid-turn still leaves traces.
|
||
if err := s.insertTurnRow(ctx, &PaliadinTurn{
|
||
TurnID: turnID,
|
||
UserID: req.UserID,
|
||
SessionID: req.SessionID,
|
||
StartedAt: startedAt,
|
||
UserMessage: req.UserMessage,
|
||
PageOrigin: optionalString(req.PageOrigin),
|
||
}); err != nil {
|
||
return nil, fmt.Errorf("paliadin: insert turn row: %w", err)
|
||
}
|
||
|
||
// Ensure tmux session + Claude pane.
|
||
target, err := s.ensurePane(ctx)
|
||
if err != nil {
|
||
_ = s.markTurnError(ctx, turnID, "tmux_unresponsive")
|
||
return nil, fmt.Errorf("%w: %v", ErrTmuxUnavailable, err)
|
||
}
|
||
|
||
// Make sure the response dir exists.
|
||
if err := os.MkdirAll(s.responseDir, 0o755); err != nil {
|
||
_ = s.markTurnError(ctx, turnID, "tmux_unresponsive")
|
||
return nil, fmt.Errorf("paliadin: mkdir response dir: %w", err)
|
||
}
|
||
|
||
// Send the framed prompt. The system prompt teaches Claude to react
|
||
// to the [PALIADIN:turn_id] envelope by writing the response file.
|
||
envelope := fmt.Sprintf("[PALIADIN:%s] %s", turnID, sanitiseForTmux(req.UserMessage))
|
||
if err := s.sendToPane(ctx, target, envelope); err != nil {
|
||
_ = s.markTurnError(ctx, turnID, "tmux_unresponsive")
|
||
return nil, fmt.Errorf("%w: send prompt: %v", ErrTmuxUnavailable, err)
|
||
}
|
||
|
||
// Poll for the response file. Fixed 60 s timeout; abort early if the
|
||
// caller's context is cancelled (e.g. user clicked Stop).
|
||
respPath := filepath.Join(s.responseDir, turnID.String()+".txt")
|
||
body, err := s.pollForResponse(ctx, respPath, 60*time.Second)
|
||
if err != nil {
|
||
ec := "timeout"
|
||
if errors.Is(err, context.Canceled) {
|
||
ec = "user_aborted"
|
||
}
|
||
_ = s.markTurnAbandonedOrError(ctx, turnID, ec, ec == "user_aborted")
|
||
return nil, err
|
||
}
|
||
|
||
// Strip + parse the [paliadin-meta] trailer. Best-effort: the prompt
|
||
// instructs Claude to emit it but the PoC's monitoring is precisely
|
||
// what tells us how reliable that is in practice.
|
||
cleanBody, meta := splitTrailer(body)
|
||
tokens := approxTokenCount(cleanBody)
|
||
chipCount := countChips(cleanBody)
|
||
finished := time.Now().UTC()
|
||
durationMS := int(finished.Sub(startedAt) / time.Millisecond)
|
||
|
||
// Write the result back into the audit row.
|
||
if err := s.completeTurn(ctx, turnID, finished, durationMS, cleanBody, tokens, meta, chipCount); err != nil {
|
||
log.Printf("paliadin: complete turn %s: %v", turnID, err)
|
||
// Don't fail the user-facing request on audit-row write errors —
|
||
// the response is real even if the bookkeeping is broken.
|
||
}
|
||
|
||
return &TurnResult{
|
||
TurnID: turnID,
|
||
Response: cleanBody,
|
||
UsedTools: meta.UsedTools,
|
||
RowsSeen: meta.RowsSeen,
|
||
ChipCount: chipCount,
|
||
ClassifierTag: meta.ClassifierTag,
|
||
DurationMS: durationMS,
|
||
}, nil
|
||
}
|
||
|
||
// ResetSession sends `/clear` to the Claude pane so the next turn starts
|
||
// from a clean conversation. Used by the "New conversation" button.
|
||
func (s *LocalPaliadinService) ResetSession(ctx context.Context) error {
|
||
s.mu.Lock()
|
||
target := s.paneTarget
|
||
s.mu.Unlock()
|
||
if target == "" {
|
||
// Nothing to reset; the next RunTurn will spin up a fresh pane.
|
||
return nil
|
||
}
|
||
if err := s.sendToPane(ctx, target, "/clear"); err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// ListRecentTurns reads the last N turns visible to the caller.
|
||
// global_admin sees everything; everyone else sees their own.
|
||
func (s *paliadinDB) ListRecentTurns(ctx context.Context, callerID uuid.UUID, limit int) ([]PaliadinTurn, error) {
|
||
if limit <= 0 || limit > 200 {
|
||
limit = 50
|
||
}
|
||
out := make([]PaliadinTurn, 0, limit)
|
||
q := `
|
||
SELECT turn_id, user_id, session_id, started_at, finished_at, duration_ms,
|
||
user_message, response, response_tokens, used_tools, rows_seen,
|
||
chip_count, abandoned, page_origin, error_code, classifier_tag
|
||
FROM paliad.paliadin_turns
|
||
WHERE user_id = $1
|
||
OR EXISTS (SELECT 1 FROM paliad.users u
|
||
WHERE u.id = $1 AND u.global_role = 'global_admin')
|
||
ORDER BY started_at DESC
|
||
LIMIT $2
|
||
`
|
||
if err := s.db.SelectContext(ctx, &out, q, callerID, limit); err != nil {
|
||
return nil, fmt.Errorf("paliadin: list turns: %w", err)
|
||
}
|
||
return out, nil
|
||
}
|
||
|
||
// PaliadinStats is the aggregate view shown on /admin/paliadin.
|
||
type PaliadinStats struct {
|
||
TotalTurns int `json:"total_turns"`
|
||
TurnsLast7Days int `json:"turns_last_7_days"`
|
||
MedianDurationMS int `json:"median_duration_ms"`
|
||
P90DurationMS int `json:"p90_duration_ms"`
|
||
ToolUseRate float64 `json:"tool_use_rate"` // 0..1
|
||
AbandonRate float64 `json:"abandon_rate"` // 0..1
|
||
ByClassifier map[string]int `json:"by_classifier"` // tag → count
|
||
DailyCounts []PaliadinDailyCount `json:"daily_counts"` // last 30 days
|
||
TopPrompts []PaliadinPromptCount `json:"top_prompts"` // most-frequent normalised prompts
|
||
}
|
||
|
||
type PaliadinDailyCount struct {
|
||
Day string `db:"day" json:"day"` // YYYY-MM-DD
|
||
Count int `db:"count" json:"count"`
|
||
}
|
||
|
||
type PaliadinPromptCount struct {
|
||
Prompt string `db:"prompt" json:"prompt"`
|
||
Count int `db:"count" json:"count"`
|
||
}
|
||
|
||
// Stats computes the dashboard aggregate. global_admin sees everything;
|
||
// everyone else sees their own slice (PoC has only m, but the policy
|
||
// matches RLS on the table).
|
||
func (s *paliadinDB) Stats(ctx context.Context, callerID uuid.UUID) (*PaliadinStats, error) {
|
||
stats := &PaliadinStats{
|
||
ByClassifier: map[string]int{},
|
||
DailyCounts: []PaliadinDailyCount{},
|
||
TopPrompts: []PaliadinPromptCount{},
|
||
}
|
||
|
||
// Visibility predicate: caller's own rows OR all rows if global_admin.
|
||
visible := `(user_id = $1 OR EXISTS (SELECT 1 FROM paliad.users u WHERE u.id = $1 AND u.global_role = 'global_admin'))`
|
||
|
||
// Total + 7-day count.
|
||
if err := s.db.QueryRowxContext(ctx, fmt.Sprintf(`
|
||
SELECT COUNT(*),
|
||
COUNT(*) FILTER (WHERE started_at >= now() - interval '7 days')
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
`, visible), callerID).Scan(&stats.TotalTurns, &stats.TurnsLast7Days); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats totals: %w", err)
|
||
}
|
||
|
||
if stats.TotalTurns == 0 {
|
||
return stats, nil
|
||
}
|
||
|
||
// Duration percentiles. Skip rows still in flight (duration_ms NULL).
|
||
if err := s.db.QueryRowxContext(ctx, fmt.Sprintf(`
|
||
SELECT COALESCE(percentile_cont(0.5) WITHIN GROUP (ORDER BY duration_ms), 0)::int,
|
||
COALESCE(percentile_cont(0.9) WITHIN GROUP (ORDER BY duration_ms), 0)::int
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s AND duration_ms IS NOT NULL
|
||
`, visible), callerID).Scan(&stats.MedianDurationMS, &stats.P90DurationMS); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats percentiles: %w", err)
|
||
}
|
||
|
||
// Tool-use + abandon rates.
|
||
var toolUsedTurns, abandonedTurns int
|
||
if err := s.db.QueryRowxContext(ctx, fmt.Sprintf(`
|
||
SELECT COUNT(*) FILTER (WHERE array_length(used_tools, 1) > 0),
|
||
COUNT(*) FILTER (WHERE abandoned = true)
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
`, visible), callerID).Scan(&toolUsedTurns, &abandonedTurns); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats rates: %w", err)
|
||
}
|
||
stats.ToolUseRate = float64(toolUsedTurns) / float64(stats.TotalTurns)
|
||
stats.AbandonRate = float64(abandonedTurns) / float64(stats.TotalTurns)
|
||
|
||
// Histogram by classifier_tag.
|
||
rows, err := s.db.QueryxContext(ctx, fmt.Sprintf(`
|
||
SELECT COALESCE(classifier_tag, 'untagged') AS tag, COUNT(*) AS n
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
GROUP BY tag
|
||
`, visible), callerID)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats classifier: %w", err)
|
||
}
|
||
defer rows.Close()
|
||
for rows.Next() {
|
||
var tag string
|
||
var n int
|
||
if err := rows.Scan(&tag, &n); err != nil {
|
||
return nil, err
|
||
}
|
||
stats.ByClassifier[tag] = n
|
||
}
|
||
|
||
// Daily counts (last 30 days).
|
||
if err := s.db.SelectContext(ctx, &stats.DailyCounts, fmt.Sprintf(`
|
||
SELECT to_char(date_trunc('day', started_at), 'YYYY-MM-DD') AS day,
|
||
COUNT(*) AS count
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
AND started_at >= now() - interval '30 days'
|
||
GROUP BY day
|
||
ORDER BY day ASC
|
||
`, visible), callerID); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats daily: %w", err)
|
||
}
|
||
|
||
// Top prompts (normalised: lowercase + collapse whitespace + trim).
|
||
if err := s.db.SelectContext(ctx, &stats.TopPrompts, fmt.Sprintf(`
|
||
SELECT trim(regexp_replace(lower(user_message), '\s+', ' ', 'g')) AS prompt,
|
||
COUNT(*) AS count
|
||
FROM paliad.paliadin_turns
|
||
WHERE %s
|
||
GROUP BY prompt
|
||
ORDER BY count DESC, prompt ASC
|
||
LIMIT 10
|
||
`, visible), callerID); err != nil {
|
||
return nil, fmt.Errorf("paliadin: stats top prompts: %w", err)
|
||
}
|
||
|
||
return stats, nil
|
||
}
|
||
|
||
// =============================================================================
|
||
// tmux orchestration — adapted from mVoice/server.py:250-380.
|
||
// =============================================================================
|
||
|
||
// ensurePane returns the tmux target ("session:window-idx") of the live
|
||
// Claude pane, creating both session and window if missing.
|
||
func (s *LocalPaliadinService) ensurePane(ctx context.Context) (string, error) {
|
||
s.mu.Lock()
|
||
defer s.mu.Unlock()
|
||
|
||
// Cheap path: if we have a cached target and it's still alive, reuse.
|
||
if s.paneTarget != "" && s.paneAlive(ctx, s.paneTarget) {
|
||
return s.paneTarget, nil
|
||
}
|
||
|
||
// Ensure session.
|
||
if err := runTmux(ctx, "has-session", "-t", s.tmuxSession); err != nil {
|
||
// Create detached.
|
||
if err := runTmux(ctx, "new-session", "-d", "-s", s.tmuxSession); err != nil {
|
||
return "", fmt.Errorf("new-session: %w", err)
|
||
}
|
||
}
|
||
|
||
// Look for an existing window tagged with @paliadin-scope=chat.
|
||
if existing := s.findChatWindow(ctx); existing != "" {
|
||
s.paneTarget = existing
|
||
return existing, nil
|
||
}
|
||
|
||
// No window — create one running `claude` in a fresh pane. Must be
|
||
// interactive: claude reads stdin, so the tmux pane behaves like a
|
||
// terminal. We use `new-window -P -F` to print the new index back.
|
||
out, err := runTmuxOut(ctx, "new-window", "-t", s.tmuxSession,
|
||
"-n", "claude-paliadin",
|
||
"-P", "-F", "#{window_index}",
|
||
"claude")
|
||
if err != nil {
|
||
return "", fmt.Errorf("new-window claude: %w", err)
|
||
}
|
||
idx := strings.TrimSpace(out)
|
||
target := fmt.Sprintf("%s:%s", s.tmuxSession, idx)
|
||
|
||
// Wait for Claude's prompt indicator. Claude Code's interactive
|
||
// prompt rendering varies but always settles into a state where the
|
||
// pane has a "❯" prompt glyph or "│" sidebar visible. We give it
|
||
// 30 s, which is generous.
|
||
if err := s.waitForPaneReady(ctx, target, 30*time.Second); err != nil {
|
||
return "", fmt.Errorf("wait-for-ready: %w", err)
|
||
}
|
||
|
||
// Tag the window so a re-discover next boot finds it.
|
||
_ = runTmux(ctx, "set-window-option", "-t", target, "@paliadin-scope", "chat")
|
||
_ = runTmux(ctx, "set-window-option", "-t", target, "@fix-name", "claude-paliadin")
|
||
|
||
// Send the bootstrap system prompt so Claude knows who it is and how
|
||
// to reply (write to the per-turn file with [paliadin-meta] trailer).
|
||
if err := s.sendToPane(ctx, target, paliadinSystemPrompt(s.responseDir)); err != nil {
|
||
return "", fmt.Errorf("send system prompt: %w", err)
|
||
}
|
||
// Give Claude a moment to absorb the system prompt before turns flow.
|
||
select {
|
||
case <-ctx.Done():
|
||
return "", ctx.Err()
|
||
case <-time.After(2 * time.Second):
|
||
}
|
||
|
||
s.paneTarget = target
|
||
return target, nil
|
||
}
|
||
|
||
func (s *LocalPaliadinService) findChatWindow(ctx context.Context) string {
|
||
out, err := runTmuxOut(ctx, "list-windows", "-t", s.tmuxSession,
|
||
"-F", "#{window_index}")
|
||
if err != nil {
|
||
return ""
|
||
}
|
||
for _, idx := range strings.Fields(out) {
|
||
target := fmt.Sprintf("%s:%s", s.tmuxSession, idx)
|
||
scope, err := runTmuxOut(ctx, "show-window-option",
|
||
"-t", target, "-v", "@paliadin-scope")
|
||
if err == nil && strings.TrimSpace(scope) == "chat" {
|
||
return target
|
||
}
|
||
}
|
||
return ""
|
||
}
|
||
|
||
func (s *LocalPaliadinService) paneAlive(ctx context.Context, target string) bool {
|
||
if err := runTmux(ctx, "has-session", "-t", target); err != nil {
|
||
return false
|
||
}
|
||
return true
|
||
}
|
||
|
||
func (s *LocalPaliadinService) waitForPaneReady(ctx context.Context, target string, timeout time.Duration) error {
|
||
deadline := time.Now().Add(timeout)
|
||
for time.Now().Before(deadline) {
|
||
select {
|
||
case <-ctx.Done():
|
||
return ctx.Err()
|
||
default:
|
||
}
|
||
out, err := runTmuxOut(ctx, "capture-pane", "-t", target, "-p")
|
||
if err == nil && (strings.Contains(out, "❯") || strings.Contains(out, "│")) {
|
||
return nil
|
||
}
|
||
time.Sleep(500 * time.Millisecond)
|
||
}
|
||
return fmt.Errorf("pane %s not ready within %s", target, timeout)
|
||
}
|
||
|
||
func (s *LocalPaliadinService) sendToPane(ctx context.Context, target, msg string) error {
|
||
// `-l` sends the message literally (no key parsing) — necessary so
|
||
// our prompt's special characters don't get interpreted.
|
||
if err := runTmux(ctx, "send-keys", "-t", target, "-l", msg); err != nil {
|
||
return err
|
||
}
|
||
// Trailing Enter. tmux send-keys treats "Enter" as a special key name.
|
||
if err := runTmux(ctx, "send-keys", "-t", target, "Enter"); err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// pollForResponse waits for the response file to materialise. Returns
|
||
// the file's content (and removes the file). Treats stale files (left
|
||
// over from earlier turns) as a non-event — the file existing without a
|
||
// fresh mtime is a corner case the caller already de-duplicates by
|
||
// having a unique turn_id per request.
|
||
func (s *LocalPaliadinService) pollForResponse(ctx context.Context, path string, timeout time.Duration) (string, error) {
|
||
deadline := time.Now().Add(timeout)
|
||
for time.Now().Before(deadline) {
|
||
select {
|
||
case <-ctx.Done():
|
||
return "", ctx.Err()
|
||
default:
|
||
}
|
||
data, err := os.ReadFile(path)
|
||
if err == nil && len(data) > 0 {
|
||
// Brief settle delay so we don't read mid-flush.
|
||
time.Sleep(50 * time.Millisecond)
|
||
data, _ = os.ReadFile(path)
|
||
_ = os.Remove(path)
|
||
return string(data), nil
|
||
}
|
||
time.Sleep(200 * time.Millisecond)
|
||
}
|
||
return "", fmt.Errorf("paliadin: response timeout after %s", timeout)
|
||
}
|
||
|
||
// =============================================================================
|
||
// shell / tmux helpers.
|
||
// =============================================================================
|
||
|
||
// runTmux runs `tmux <args...>`. Discards output. Returns error if tmux
|
||
// returns non-zero.
|
||
func runTmux(ctx context.Context, args ...string) error {
|
||
c, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||
defer cancel()
|
||
cmd := exec.CommandContext(c, "tmux", args...)
|
||
var stderr bytes.Buffer
|
||
cmd.Stderr = &stderr
|
||
if err := cmd.Run(); err != nil {
|
||
return fmt.Errorf("tmux %s: %w (stderr: %s)", strings.Join(args, " "), err, stderr.String())
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// runTmuxOut runs `tmux <args...>` and returns stdout. Useful for
|
||
// capture-pane / list-windows / show-window-option.
|
||
func runTmuxOut(ctx context.Context, args ...string) (string, error) {
|
||
c, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||
defer cancel()
|
||
cmd := exec.CommandContext(c, "tmux", args...)
|
||
var stdout, stderr bytes.Buffer
|
||
cmd.Stdout = &stdout
|
||
cmd.Stderr = &stderr
|
||
if err := cmd.Run(); err != nil {
|
||
return "", fmt.Errorf("tmux %s: %w (stderr: %s)", strings.Join(args, " "), err, stderr.String())
|
||
}
|
||
return stdout.String(), nil
|
||
}
|
||
|
||
// sanitiseForTmux removes control sequences that would confuse the pane.
|
||
// `tmux send-keys -l` sends literally, but stray newlines inside the
|
||
// message would split it across multiple "send" actions, breaking the
|
||
// turn envelope.
|
||
func sanitiseForTmux(s string) string {
|
||
s = strings.ReplaceAll(s, "\r", " ")
|
||
s = strings.ReplaceAll(s, "\n", " ")
|
||
// Cap length: a runaway prompt is a footgun.
|
||
const maxLen = 8000
|
||
if len(s) > maxLen {
|
||
s = s[:maxLen] + " […truncated]"
|
||
}
|
||
return s
|
||
}
|
||
|
||
// =============================================================================
|
||
// trailer parsing.
|
||
// =============================================================================
|
||
|
||
// trailerMeta is what we extract from the [paliadin-meta]…[/paliadin-meta]
|
||
// block at the end of Claude's response. Best-effort: missing fields
|
||
// default to zero values.
|
||
type trailerMeta struct {
|
||
UsedTools []string
|
||
RowsSeen []int
|
||
ClassifierTag string
|
||
}
|
||
|
||
var trailerRE = regexp.MustCompile(`(?s)\n*---\s*\n+\[paliadin-meta\]\s*\n(.+?)\n\[/paliadin-meta\]\s*$`)
|
||
|
||
// splitTrailer separates the meta block from the body. If no trailer is
|
||
// present, the entire input is returned as the body.
|
||
func splitTrailer(body string) (string, trailerMeta) {
|
||
body = strings.TrimRight(body, " \t\n\r")
|
||
m := trailerRE.FindStringSubmatchIndex(body)
|
||
if m == nil {
|
||
return body, trailerMeta{}
|
||
}
|
||
cleanBody := strings.TrimRight(body[:m[0]], " \t\n\r")
|
||
metaText := body[m[2]:m[3]]
|
||
return cleanBody, parseTrailer(metaText)
|
||
}
|
||
|
||
func parseTrailer(text string) trailerMeta {
|
||
out := trailerMeta{}
|
||
for _, line := range strings.Split(text, "\n") {
|
||
k, v, ok := splitFirst(strings.TrimSpace(line), ":")
|
||
if !ok {
|
||
continue
|
||
}
|
||
v = strings.TrimSpace(v)
|
||
switch strings.ToLower(strings.TrimSpace(k)) {
|
||
case "used_tools":
|
||
for _, t := range strings.Split(v, ",") {
|
||
t = strings.TrimSpace(t)
|
||
if t != "" {
|
||
out.UsedTools = append(out.UsedTools, t)
|
||
}
|
||
}
|
||
case "rows_seen":
|
||
for _, t := range strings.Split(v, ",") {
|
||
n, err := strconv.Atoi(strings.TrimSpace(t))
|
||
if err == nil {
|
||
out.RowsSeen = append(out.RowsSeen, n)
|
||
}
|
||
}
|
||
case "classifier_tag":
|
||
out.ClassifierTag = v
|
||
}
|
||
}
|
||
return out
|
||
}
|
||
|
||
func splitFirst(s, sep string) (string, string, bool) {
|
||
i := strings.Index(s, sep)
|
||
if i < 0 {
|
||
return "", "", false
|
||
}
|
||
return s[:i], s[i+len(sep):], true
|
||
}
|
||
|
||
// approxTokenCount is a coarse word-count × 1.3 heuristic. Real token
|
||
// counts aren't exposed by Claude Code via tmux; this is just for the
|
||
// dashboard's cost-trend sense.
|
||
func approxTokenCount(s string) int {
|
||
if s == "" {
|
||
return 0
|
||
}
|
||
words := strings.Fields(s)
|
||
return int(float64(len(words)) * 1.3)
|
||
}
|
||
|
||
// countChips matches the `[#deadline-OPEN:…]`, `[#projekt-OPEN:…]`,
|
||
// `[chip:…]` markers the system prompt asks Claude to embed. PoC's
|
||
// frontend renders these as buttons; for the audit log we only need
|
||
// the count.
|
||
var chipRE = regexp.MustCompile(`\[(?:#[a-z]+-OPEN:[A-Za-z0-9\-_]+|chip:[a-z]+:[^\]]+)\]`)
|
||
|
||
func countChips(s string) int {
|
||
return len(chipRE.FindAllString(s, -1))
|
||
}
|
||
|
||
// =============================================================================
|
||
// audit-row writers.
|
||
// =============================================================================
|
||
|
||
func (s *paliadinDB) insertTurnRow(ctx context.Context, t *PaliadinTurn) error {
|
||
q := `
|
||
INSERT INTO paliad.paliadin_turns (
|
||
turn_id, user_id, session_id, started_at, user_message, page_origin
|
||
) VALUES ($1, $2, $3, $4, $5, $6)
|
||
`
|
||
_, err := s.db.ExecContext(ctx, q,
|
||
t.TurnID, t.UserID, t.SessionID, t.StartedAt, t.UserMessage, t.PageOrigin)
|
||
return err
|
||
}
|
||
|
||
func (s *paliadinDB) completeTurn(ctx context.Context, turnID uuid.UUID,
|
||
finishedAt time.Time, durationMS int, response string, tokens int,
|
||
meta trailerMeta, chipCount int) error {
|
||
rowsSeen := make(pq.Int64Array, 0, len(meta.RowsSeen))
|
||
for _, n := range meta.RowsSeen {
|
||
rowsSeen = append(rowsSeen, int64(n))
|
||
}
|
||
q := `
|
||
UPDATE paliad.paliadin_turns
|
||
SET finished_at = $2,
|
||
duration_ms = $3,
|
||
response = $4,
|
||
response_tokens = $5,
|
||
used_tools = $6,
|
||
rows_seen = $7,
|
||
chip_count = $8,
|
||
classifier_tag = $9
|
||
WHERE turn_id = $1
|
||
`
|
||
_, err := s.db.ExecContext(ctx, q,
|
||
turnID, finishedAt, durationMS, response, tokens,
|
||
pq.StringArray(meta.UsedTools), rowsSeen, chipCount,
|
||
optionalString(meta.ClassifierTag))
|
||
return err
|
||
}
|
||
|
||
func (s *paliadinDB) markTurnError(ctx context.Context, turnID uuid.UUID, code string) error {
|
||
finished := time.Now().UTC()
|
||
q := `
|
||
UPDATE paliad.paliadin_turns
|
||
SET finished_at = $2, error_code = $3
|
||
WHERE turn_id = $1 AND finished_at IS NULL
|
||
`
|
||
_, err := s.db.ExecContext(ctx, q, turnID, finished, code)
|
||
return err
|
||
}
|
||
|
||
func (s *paliadinDB) markTurnAbandonedOrError(ctx context.Context, turnID uuid.UUID, code string, abandoned bool) error {
|
||
finished := time.Now().UTC()
|
||
q := `
|
||
UPDATE paliad.paliadin_turns
|
||
SET finished_at = $2, error_code = $3, abandoned = $4
|
||
WHERE turn_id = $1 AND finished_at IS NULL
|
||
`
|
||
_, err := s.db.ExecContext(ctx, q, turnID, finished, code, abandoned)
|
||
return err
|
||
}
|
||
|
||
func optionalString(s string) *string {
|
||
if s == "" {
|
||
return nil
|
||
}
|
||
return &s
|
||
}
|
||
|
||
// Compile-time type guards (catches sql.ErrNoRows shifts).
|
||
var _ error = sql.ErrNoRows
|