Files
paliad/scripts/paliadin-shim
m 3e1f4eee4b fix(t-paliad-155): cold-start timeout headroom + ban DB fallbacks in skill
Shim's run-turn hard timeout: 60s → 120s (PALIADIN_TIMEOUT_S default).
First turn after a fresh tmux session stacks claude boot + skill load
+ MCP discovery + first reasoning, which can blow past 60s before the
response file lands.

Aligned the surrounding timeouts so 120s is actually reachable:
- callShim ctx (paliadin_remote.go): 70s → 130s (shim 120 + 10 SSH).
- runPaliadinTurnAsync handler ctx: 120s → 150s (shim 120 + 10 SSH +
  20 paliad-side overhead).

SKILL.md hard rule #6 added: never fall back to psql / curl PostgREST /
nix-shell — mcp__supabase__execute_sql is the only DB tool. If it's
unavailable, write a short 'DB nicht erreichbar — bitte paliad neu
deployen oder PALIADIN_REMOTE_CWD prüfen' response immediately with
classifier_tag=meta. Saves the 60s-fallback-dance failure mode m hit
on the cwd-misconfig turn.
2026-05-08 13:19:27 +02:00

221 lines
7.8 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# paliadin-shim — server-side RPC for paliad's remote-tmux turns.
#
# Invoked via mRiver's ~/.ssh/authorized_keys command= restriction. The
# client's requested command is exposed in $SSH_ORIGINAL_COMMAND; this
# script parses it and dispatches to a fixed verb set.
#
# Design: docs/design-paliadin-tailscale-ssh-2026-05-07.md §5.4 +
# t-paliad-155 (per-user session keying + skill-based persona).
#
# Verbs (every verb takes the tmux session name as the first positional
# argument; per-user sessions are created on demand):
#
# health <session> -> "ok" iff tmux + claude reachable
# run-turn <session> <uuid> <msg-base64> -> send framed prompt, poll, return
# reset <session> -> kill the session entirely
#
# The persona + response protocol live in the Paliadin skill at
# ~/.claude/skills/paliadin/SKILL.md (see scripts/skills/paliadin/SKILL.md
# in the repo). Claude's skill router auto-matches the [PALIADIN:<uuid>]
# envelope and writes the response to /tmp/paliadin/<uuid>.txt — that is
# the contract this shim polls on. There is no longer a bootstrap step.
#
# All multi-character payloads (messages) are base64-encoded by the Go
# caller so we never have to quote them through ssh's argv.
#
# Errors go to stderr with a non-zero exit. The Go side maps the exit
# status into a friendly error code.
set -euo pipefail
umask 077
readonly RESPONSE_DIR="${PALIADIN_RESPONSE_DIR:-/tmp/paliadin}"
readonly TIMEOUT_S="${PALIADIN_TIMEOUT_S:-120}"
# Working directory for the claude pane. Must be the paliad repo root so
# claude picks up .mcp.json (project-scoped Supabase MCP) — without it,
# the SKILL.md SQL recipes fail with no DB tool. Override via env var if
# the repo lives elsewhere on this host.
readonly CLAUDE_CWD="${PALIADIN_REMOTE_CWD:-/home/m/dev/paliad}"
readonly PANE_READY_S=60 # max wait for claude pane to settle
readonly TURN_ID_RE='^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'
# Session names are constructed by the Go side as `paliad-paliadin-<userid8>`;
# allow the same shape m might dial by hand. Stays defensive against shell
# metacharacters since this string is interpolated into tmux targets.
readonly SESSION_RE='^[A-Za-z0-9_.-]{1,64}$'
mkdir -p "$RESPONSE_DIR"
chmod 700 "$RESPONSE_DIR"
# Parse $SSH_ORIGINAL_COMMAND into argv. Format: "<verb> <arg1> <arg2> …".
# We never `eval` this; `read -r -a` splits on $IFS without word-expansion.
read -r -a argv <<< "${SSH_ORIGINAL_COMMAND:-}"
verb="${argv[0]:-}"
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------
log_err() { printf 'paliadin-shim: %s\n' "$*" >&2; }
# require_session validates argv[1] as a tmux session name. Echoes the
# validated name on success; logs + exits on failure.
require_session() {
local s="${argv[1]:-}"
if [[ -z "$s" ]]; then
log_err "$verb: missing session name"; exit 2
fi
if [[ ! "$s" =~ $SESSION_RE ]]; then
log_err "$verb: invalid session name"; exit 2
fi
printf '%s' "$s"
}
# ensure_pane creates the named tmux session + claude window if missing,
# waits for the pane to become ready, and prints the target identifier
# ("session:window-idx") on stdout.
#
# Per-user sessions are independently namespaced inside tmux; multiple
# paliad-paliadin-* sessions can coexist on mRiver without interfering.
ensure_pane() {
local session="$1"
if ! tmux has-session -t "$session" 2>/dev/null; then
tmux new-session -d -s "$session"
fi
# Look for an existing window tagged with @paliadin-scope=chat.
local target=""
local idx scope
while read -r idx; do
[[ -z "$idx" ]] && continue
scope=$(tmux show-window-option -t "$session:$idx" -v @paliadin-scope 2>/dev/null || true)
if [[ "$scope" == "chat" ]]; then
target="$session:$idx"
break
fi
done < <(tmux list-windows -t "$session" -F '#{window_index}' 2>/dev/null || true)
if [[ -z "$target" ]]; then
if ! command -v claude >/dev/null 2>&1; then
log_err "claude CLI not found in PATH"
exit 3
fi
if [[ ! -d "$CLAUDE_CWD" ]]; then
log_err "claude cwd $CLAUDE_CWD does not exist — set PALIADIN_REMOTE_CWD"
exit 3
fi
idx=$(tmux new-window -c "$CLAUDE_CWD" -t "$session" -n claude-paliadin -P -F '#{window_index}' claude)
target="$session:$idx"
# Wait for claude to settle. Matches Go waitForPaneReady (paliadin.go).
local deadline=$(( $(date +%s) + PANE_READY_S ))
local pane=""
while [[ $(date +%s) -lt $deadline ]]; do
pane=$(tmux capture-pane -t "$target" -p 2>/dev/null || true)
if [[ "$pane" == *""* || "$pane" == *"│"* ]]; then
break
fi
sleep 0.5
done
tmux set-window-option -t "$target" @paliadin-scope chat >/dev/null
tmux set-window-option -t "$target" @fix-name claude-paliadin >/dev/null
fi
printf '%s' "$target"
}
# send_to_pane writes a literal string then Enter.
send_to_pane() {
local target="$1" msg="$2"
tmux send-keys -t "$target" -l -- "$msg"
tmux send-keys -t "$target" Enter
}
# ---------------------------------------------------------------------------
# verb dispatch
# ---------------------------------------------------------------------------
case "$verb" in
health)
# Used by the Go side's healthGate to short-circuit when mRiver is
# offline or tmux/claude is broken. Output is parsed verbatim.
# Session is required (per-user) but health is *not* expected to
# spin up the claude pane — only validates tooling + that we could
# in principle create the session.
session=$(require_session)
if ! command -v tmux >/dev/null 2>&1; then
log_err "tmux not in PATH"; exit 1
fi
if ! command -v claude >/dev/null 2>&1; then
log_err "claude not in PATH"; exit 1
fi
if ! tmux has-session -t "$session" 2>/dev/null; then
tmux new-session -d -s "$session"
fi
echo ok
;;
run-turn)
# $1 = session, $2 = turn_id (UUID), $3 = base64-encoded user message.
session=$(require_session)
turn_id="${argv[2]:-}"
if [[ ! "$turn_id" =~ $TURN_ID_RE ]]; then
log_err "run-turn: bad turn_id"; exit 2
fi
if [[ -z "${argv[3]:-}" ]]; then
log_err "run-turn: missing message"; exit 2
fi
if ! msg=$(printf '%s' "${argv[3]}" | base64 -d 2>/dev/null); then
log_err "run-turn: invalid base64 message"; exit 2
fi
target=$(ensure_pane "$session")
out="$RESPONSE_DIR/$turn_id.txt"
rm -f "$out"
# Envelope. The Paliadin skill (~/.claude/skills/paliadin/SKILL.md)
# description-matches on this exact prefix, so Claude routes to the
# skill on every turn regardless of conversation state — surviving
# /clear, fresh sessions, and pane restarts.
send_to_pane "$target" "[PALIADIN:$turn_id] $msg"
# Poll for the response file. Same shape as Go pollForResponse
# (paliadin.go). Settle delay so we don't read mid-flush.
deadline=$(( $(date +%s) + TIMEOUT_S ))
while [[ $(date +%s) -lt $deadline ]]; do
if [[ -s "$out" ]]; then
sleep 0.05
cat "$out"
rm -f "$out"
exit 0
fi
sleep 0.2
done
log_err "response timeout after ${TIMEOUT_S}s"
exit 124
;;
reset)
# Kill the user's session entirely so the next run-turn boots a
# fresh claude pane. With skill-based persona load, /clear would
# also work — but kill-session is simpler and removes any chance
# of leftover conversation state confusing the next turn.
session=$(require_session)
if tmux has-session -t "$session" 2>/dev/null; then
tmux kill-session -t "$session"
fi
echo ok
;;
'')
log_err "no verb (set SSH_ORIGINAL_COMMAND via authorized_keys command=)"
exit 2
;;
*)
log_err "unknown verb '$verb'"
exit 2
;;
esac