Compare commits
5 Commits
mai/bohr/i
...
mai/hermes
| Author | SHA1 | Date | |
|---|---|---|---|
| 2a8bd4313b | |||
| 4183d4c55a | |||
| 127bbf3ed5 | |||
| a24ac2826f | |||
| 20490913c1 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,3 +7,4 @@
|
||||
.env.local
|
||||
/imagen
|
||||
/coverage.txt
|
||||
/.m/
|
||||
|
||||
@@ -11,21 +11,24 @@ import (
|
||||
"mgit.msbls.de/m/ImaGen/internal/backend"
|
||||
"mgit.msbls.de/m/ImaGen/internal/config"
|
||||
"mgit.msbls.de/m/ImaGen/internal/output"
|
||||
"mgit.msbls.de/m/ImaGen/internal/preview"
|
||||
"mgit.msbls.de/m/ImaGen/internal/prompt"
|
||||
)
|
||||
|
||||
func runGenerate(ctx context.Context, args []string) error {
|
||||
fs := flag.NewFlagSet("generate", flag.ContinueOnError)
|
||||
var (
|
||||
backendName string
|
||||
size string
|
||||
outPath string
|
||||
seed int64
|
||||
steps int
|
||||
style string
|
||||
negative string
|
||||
configPath string
|
||||
noSidecar bool
|
||||
backendName string
|
||||
size string
|
||||
outPath string
|
||||
seed int64
|
||||
steps int
|
||||
style string
|
||||
negative string
|
||||
configPath string
|
||||
noSidecar bool
|
||||
previewOn bool
|
||||
previewOff bool
|
||||
)
|
||||
fs.StringVar(&backendName, "backend", "", "backend instance name (default: config.default_backend)")
|
||||
fs.StringVar(&size, "size", "1024x1024", "WxH, e.g. 1024x1024")
|
||||
@@ -36,6 +39,8 @@ func runGenerate(ctx context.Context, args []string) error {
|
||||
fs.StringVar(&negative, "negative", "", "negative prompt (ignored by backends that don't support it)")
|
||||
fs.StringVar(&configPath, "config", "", "config file path (default: ~/.config/imagen.yaml)")
|
||||
fs.BoolVar(&noSidecar, "no-sidecar", false, "skip the JSON sidecar even if config enables it")
|
||||
fs.BoolVar(&previewOn, "preview", false, "force tmux preview window on (errors outside $TMUX)")
|
||||
fs.BoolVar(&previewOff, "no-preview", false, "skip the tmux preview window")
|
||||
fs.Usage = func() {
|
||||
fmt.Fprintln(fs.Output(), `Usage: imagen generate "<prompt>" [flags]`)
|
||||
fs.PrintDefaults()
|
||||
@@ -118,9 +123,70 @@ func runGenerate(ctx context.Context, args []string) error {
|
||||
if paths.SidecarPath != "" {
|
||||
fmt.Fprintln(os.Stderr, "sidecar:", paths.SidecarPath)
|
||||
}
|
||||
|
||||
if err := maybePreview(cfg, previewOn, previewOff, paths.ImagePath, rawPrompt); err != nil {
|
||||
// preview failures are warnings — the image already wrote.
|
||||
fmt.Fprintln(os.Stderr, "imagen: preview:", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// resolvePreviewMode applies the precedence chain config -> env -> flag.
|
||||
// Flags win, env beats config, config beats the implicit auto default.
|
||||
func resolvePreviewMode(cfg *config.Config, flagOn, flagOff bool, env string) (preview.Mode, error) {
|
||||
mode := preview.ModeAuto
|
||||
if cfg != nil && cfg.Output.Preview != "" {
|
||||
m, err := preview.ParseMode(cfg.Output.Preview)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("config output.preview: %w", err)
|
||||
}
|
||||
mode = m
|
||||
}
|
||||
if env != "" {
|
||||
m, err := preview.ParseMode(env)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("$IMAGEN_PREVIEW: %w", err)
|
||||
}
|
||||
mode = m
|
||||
}
|
||||
if flagOn && flagOff {
|
||||
return "", userErr("--preview and --no-preview are mutually exclusive")
|
||||
}
|
||||
if flagOn {
|
||||
mode = preview.ModeOn
|
||||
}
|
||||
if flagOff {
|
||||
mode = preview.ModeOff
|
||||
}
|
||||
return mode, nil
|
||||
}
|
||||
|
||||
// maybePreview resolves the effective preview mode and, if it says yes,
|
||||
// spawns a tmux window via tmux-img. Always non-fatal.
|
||||
func maybePreview(cfg *config.Config, flagOn, flagOff bool, imagePath, rawPrompt string) error {
|
||||
mode, err := resolvePreviewMode(cfg, flagOn, flagOff, os.Getenv("IMAGEN_PREVIEW"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
decision, err := preview.Resolve(mode, os.Getenv("TMUX") != "", stdoutIsTTY())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !decision.ShouldPreview {
|
||||
return nil
|
||||
}
|
||||
spawner := &preview.Spawner{}
|
||||
return spawner.Spawn(imagePath, output.Slug(rawPrompt))
|
||||
}
|
||||
|
||||
func stdoutIsTTY() bool {
|
||||
fi, err := os.Stdout.Stat()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return fi.Mode()&os.ModeCharDevice != 0
|
||||
}
|
||||
|
||||
// splitLeadingPositional separates the positional args at the start of args
|
||||
// from the rest (which begins with the first flag). A literal "--" terminator
|
||||
// pushes everything after it into the positional list and out of flag parsing.
|
||||
|
||||
50
cmd/imagen/generate_test.go
Normal file
50
cmd/imagen/generate_test.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"mgit.msbls.de/m/ImaGen/internal/config"
|
||||
"mgit.msbls.de/m/ImaGen/internal/preview"
|
||||
)
|
||||
|
||||
func TestResolvePreviewMode(t *testing.T) {
|
||||
type tc struct {
|
||||
name string
|
||||
cfg *config.Config
|
||||
flagOn bool
|
||||
flagOff bool
|
||||
env string
|
||||
want preview.Mode
|
||||
wantError bool
|
||||
}
|
||||
cases := []tc{
|
||||
{name: "all-empty-defaults-to-auto", want: preview.ModeAuto},
|
||||
{name: "config-on", cfg: &config.Config{Output: config.OutputConfig{Preview: "on"}}, want: preview.ModeOn},
|
||||
{name: "config-off", cfg: &config.Config{Output: config.OutputConfig{Preview: "off"}}, want: preview.ModeOff},
|
||||
{name: "config-auto-explicit", cfg: &config.Config{Output: config.OutputConfig{Preview: "auto"}}, want: preview.ModeAuto},
|
||||
{name: "env-overrides-config", cfg: &config.Config{Output: config.OutputConfig{Preview: "on"}}, env: "off", want: preview.ModeOff},
|
||||
{name: "flag-on-overrides-env-off", env: "off", flagOn: true, want: preview.ModeOn},
|
||||
{name: "flag-off-overrides-env-on", env: "on", flagOff: true, want: preview.ModeOff},
|
||||
{name: "flag-off-overrides-config-on", cfg: &config.Config{Output: config.OutputConfig{Preview: "on"}}, flagOff: true, want: preview.ModeOff},
|
||||
{name: "both-flags-error", flagOn: true, flagOff: true, wantError: true},
|
||||
{name: "bad-env-errors", env: "yes", wantError: true},
|
||||
{name: "bad-config-errors", cfg: &config.Config{Output: config.OutputConfig{Preview: "yes"}}, wantError: true},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got, err := resolvePreviewMode(c.cfg, c.flagOn, c.flagOff, c.env)
|
||||
if c.wantError {
|
||||
if err == nil {
|
||||
t.Fatalf("expected error, got mode %q", got)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if got != c.want {
|
||||
t.Errorf("mode = %q, want %q", got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,7 @@ upstream API. Each adapter only ever sees its own slice of `imagen.yaml`.
|
||||
│ internal/prompt │ style preset → prompt suffix
|
||||
│ internal/output │ filename templating, sidecar
|
||||
│ internal/config │ YAML loader, validation
|
||||
│ internal/preview │ tmux-img window spawner
|
||||
└──────────┬────────────┘
|
||||
│
|
||||
┌──────────▼────────────┐
|
||||
|
||||
181
docs/setup-comfyui-mrock.md
Normal file
181
docs/setup-comfyui-mrock.md
Normal file
@@ -0,0 +1,181 @@
|
||||
# ComfyUI on mRock — install + ops
|
||||
|
||||
ImaGen's `flux-schnell-local` backend talks to ComfyUI on mRock at
|
||||
`http://mrock:8188` (Tailscale-internal). This document is the reproducible
|
||||
install path from a clean mRock state.
|
||||
|
||||
mRock runs Arch Linux + systemd with an NVIDIA RTX 4070 Ti SUPER (16 GB
|
||||
VRAM). Ollama is already a native systemd service, so ComfyUI follows the
|
||||
same pattern (native Python venv + systemd unit) instead of Docker — Docker
|
||||
on mRock has no `nvidia` runtime configured, and adding one is more invasive
|
||||
than another systemd unit.
|
||||
|
||||
## Prerequisites on mRock
|
||||
|
||||
- Python via `uv` (already installed).
|
||||
- NVIDIA driver new enough for CUDA 12.4. `nvidia-smi --query-gpu=driver_version`
|
||||
should show >= 550. Driver 595 is what mRock has today.
|
||||
- ~35 GB free on `/home` for the model files.
|
||||
- `ollama.service` running on port 11434 — coexistence notes below.
|
||||
|
||||
## 1. Clone ComfyUI + Python venv
|
||||
|
||||
```bash
|
||||
mkdir -p ~/dev && cd ~/dev
|
||||
git clone --depth 1 https://github.com/comfyanonymous/ComfyUI.git comfyui
|
||||
cd comfyui
|
||||
uv venv --python 3.12 .venv
|
||||
source .venv/bin/activate.fish
|
||||
|
||||
# PyTorch CUDA 12.4 wheels — match the system driver
|
||||
uv pip install --no-cache torch torchvision torchaudio \
|
||||
--index-url https://download.pytorch.org/whl/cu124
|
||||
|
||||
uv pip install --no-cache -r requirements.txt
|
||||
```
|
||||
|
||||
Verify CUDA is wired up:
|
||||
|
||||
```bash
|
||||
.venv/bin/python -c \
|
||||
"import torch; print(torch.__version__, torch.cuda.is_available(), torch.cuda.get_device_name(0))"
|
||||
# expected: 2.6.0+cu124 True NVIDIA GeForce RTX 4070 Ti SUPER
|
||||
```
|
||||
|
||||
## 2. Models — FLUX.1 schnell
|
||||
|
||||
The Black-Forest-Labs primary repo (`black-forest-labs/FLUX.1-schnell`) is
|
||||
**gated** — `curl` against it without an HF token returns HTTP 401. We pull
|
||||
the weights from ungated mirrors of the same Apache-2.0 release.
|
||||
|
||||
| File | Where it goes | Source |
|
||||
|------|---------------|--------|
|
||||
| `flux1-schnell.safetensors` (~23.8 GB, fp16) | `models/unet/` | `Comfy-Org/flux1-schnell` |
|
||||
| `ae.safetensors` (~335 MB) | `models/vae/` | `sirorable/flux-ae-vae` |
|
||||
| `clip_l.safetensors` (~246 MB) | `models/clip/` | `comfyanonymous/flux_text_encoders` |
|
||||
| `t5xxl_fp8_e4m3fn.safetensors` (~4.9 GB) | `models/clip/` | `comfyanonymous/flux_text_encoders` |
|
||||
|
||||
```bash
|
||||
cd ~/dev/comfyui/models
|
||||
|
||||
curl -L -o unet/flux1-schnell.safetensors \
|
||||
https://huggingface.co/Comfy-Org/flux1-schnell/resolve/main/flux1-schnell.safetensors
|
||||
curl -L -o vae/ae.safetensors \
|
||||
https://huggingface.co/sirorable/flux-ae-vae/resolve/main/ae.safetensors
|
||||
curl -L -o clip/clip_l.safetensors \
|
||||
https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors
|
||||
curl -L -o clip/t5xxl_fp8_e4m3fn.safetensors \
|
||||
https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp8_e4m3fn.safetensors
|
||||
```
|
||||
|
||||
If a new HF token is configured later (`~/.cache/huggingface/token`), the
|
||||
official `black-forest-labs/FLUX.1-schnell` URL is byte-identical and can be
|
||||
swapped in.
|
||||
|
||||
## 3. systemd unit
|
||||
|
||||
Drop `/etc/systemd/system/comfyui.service`:
|
||||
|
||||
```ini
|
||||
[Unit]
|
||||
Description=ComfyUI image generation server
|
||||
Documentation=https://github.com/comfyanonymous/ComfyUI
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=m
|
||||
Group=m
|
||||
WorkingDirectory=/home/m/dev/comfyui
|
||||
ExecStart=/home/m/dev/comfyui/.venv/bin/python /home/m/dev/comfyui/main.py \
|
||||
--listen 0.0.0.0 --port 8188 \
|
||||
--output-directory /home/m/dev/comfyui/output \
|
||||
--temp-directory /home/m/dev/comfyui/temp
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
TimeoutStopSec=30
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
LimitNOFILE=65535
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
Then:
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now comfyui.service
|
||||
systemctl status comfyui.service
|
||||
```
|
||||
|
||||
The service binds `0.0.0.0:8188`. Tailscale's wireguard fence is the only
|
||||
auth — do **not** expose port 8188 to the public internet.
|
||||
|
||||
## 4. Health check
|
||||
|
||||
```bash
|
||||
curl -fsS --max-time 5 http://mrock:8188/system_stats | jq '.devices[0]'
|
||||
# expected: name "cuda:0 NVIDIA GeForce RTX 4070 Ti SUPER ...", vram_total ~16 GB
|
||||
```
|
||||
|
||||
`imagen backends` (from a host with the ImaGen CLI installed) should also
|
||||
report `flux-schnell-local: ok`.
|
||||
|
||||
## 5. VRAM coexistence with Ollama
|
||||
|
||||
mRock has 16 GB VRAM total. Ollama parks ~8 GB resident for its current
|
||||
model. FLUX schnell at fp16 weights with `weight_dtype=fp8_e4m3fn` (the
|
||||
default the adapter requests) needs roughly 10–12 GB peak for a 1024×1024
|
||||
generation, so concurrent Ollama + FLUX on mRock will OOM.
|
||||
|
||||
Two practical options:
|
||||
|
||||
- **Stop Ollama before generating** — `sudo systemctl stop ollama` frees
|
||||
the GPU, run the generation, `sudo systemctl start ollama` afterwards.
|
||||
Adequate while we don't have many concurrent users.
|
||||
- **Move Ollama off mRock** — when ImaGen is in regular use, push Ollama to
|
||||
another host so the GPU is dedicated. Tracked separately.
|
||||
|
||||
Both decisions live with whoever operates the box; the adapter does not try
|
||||
to manage Ollama.
|
||||
|
||||
## 6. Smoke test (direct, without the imagen CLI)
|
||||
|
||||
```bash
|
||||
# 1) Submit a workflow
|
||||
curl -fsS --max-time 30 -X POST -H 'Content-Type: application/json' \
|
||||
-d @flux-schnell-workflow.json \
|
||||
http://mrock:8188/prompt
|
||||
# returns: {"prompt_id": "...", "number": ..., "node_errors": {}}
|
||||
|
||||
# 2) Poll history until the prompt completes
|
||||
PID=... # from above
|
||||
until curl -fsS http://mrock:8188/history/$PID | jq -e ".\"$PID\".status.completed == true" >/dev/null; do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# 3) Pull the image
|
||||
NAME=$(curl -fsS http://mrock:8188/history/$PID \
|
||||
| jq -r ".\"$PID\".outputs[\"9\"].images[0].filename")
|
||||
curl -fsS "http://mrock:8188/view?filename=$NAME&type=output" -o /tmp/cat.png
|
||||
file /tmp/cat.png # PNG image data, 1024 x 1024
|
||||
```
|
||||
|
||||
The full ImaGen smoke test is in [usage.md](usage.md) once the Go adapter
|
||||
ships.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- **`vram_free` < 6 GB in `/system_stats`**: another GPU process is holding
|
||||
memory. Usually Ollama (`sudo systemctl stop ollama`).
|
||||
- **Workflow returns `node_errors` with `Required input is missing` for
|
||||
CLIPLoader**: text encoder filenames don't match step 2 — check that
|
||||
`clip_l.safetensors` and `t5xxl_fp8_e4m3fn.safetensors` are in
|
||||
`models/clip/`, not `models/text_encoders/`.
|
||||
- **`Access to model … is restricted`** during a model pull: the script is
|
||||
hitting a gated mirror. Use the ungated URLs from step 2.
|
||||
- **Service won't start**: check `journalctl -u comfyui --since '5 min ago'`.
|
||||
Common cause is a stale `pip` install — re-run step 1.
|
||||
@@ -24,8 +24,28 @@ imagen version print version
|
||||
| `--negative` | empty | Negative prompt (ignored by some adapters) |
|
||||
| `--output` | empty (= use naming template) | Explicit path |
|
||||
| `--no-sidecar` | `false` | Skip the JSON sidecar even if config enables it |
|
||||
| `--preview` | (auto) | Force open a tmux preview window via `tmux-img` |
|
||||
| `--no-preview` | (auto) | Suppress the preview window (use for batch / CI callers) |
|
||||
| `--config` | `~/.config/imagen.yaml` | Override config path |
|
||||
|
||||
### Preview window
|
||||
|
||||
After a successful generate, imagen optionally opens a sibling tmux window
|
||||
named `img:<slug>` running `tmux-img --hold <path>`. The new window is
|
||||
spawned in the background (`tmux new-window -d`) so the generating pane
|
||||
keeps focus and its terminal output.
|
||||
|
||||
Resolution order is **config → `$IMAGEN_PREVIEW` → flag** (later wins):
|
||||
|
||||
- `output.preview` in `imagen.yaml`: `auto` (default) | `on` | `off`
|
||||
- `IMAGEN_PREVIEW=auto|on|off` overrides config
|
||||
- `--preview` / `--no-preview` override env
|
||||
|
||||
`auto` previews iff stdout is a TTY *and* `$TMUX` is set. `on` previews
|
||||
unconditionally and errors outside a tmux session. `off` never previews.
|
||||
|
||||
Preview failures are non-fatal — the image already wrote.
|
||||
|
||||
## Examples
|
||||
|
||||
```sh
|
||||
|
||||
557
internal/backend/comfyui.go
Normal file
557
internal/backend/comfyui.go
Normal file
@@ -0,0 +1,557 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ComfyType is the type-name adapters register under for ComfyUI instances.
|
||||
const ComfyType = "comfyui"
|
||||
|
||||
// Comfy is the ComfyUI adapter. It speaks the public `/prompt` + `/history`
|
||||
// + `/view` HTTP API and submits a fixed FLUX.1 schnell workflow built from
|
||||
// the values in Request.
|
||||
//
|
||||
// Concurrency: a single Comfy is safe to share across goroutines as long as
|
||||
// the underlying http.Client is. Generate does not hold long-lived state.
|
||||
type Comfy struct {
|
||||
instance string
|
||||
|
||||
base string
|
||||
model string
|
||||
vae string
|
||||
clipL string
|
||||
clipT5 string
|
||||
dtype string
|
||||
|
||||
defaultSteps int
|
||||
defaultSampler string
|
||||
defaultScheduler string
|
||||
|
||||
httpClient *http.Client
|
||||
pollInterval time.Duration
|
||||
pollTimeout time.Duration
|
||||
|
||||
// Hooks for tests; production paths use the package-level defaults.
|
||||
randSeed func() int64
|
||||
clientIDFn func() string
|
||||
}
|
||||
|
||||
// NewComfy is the registry constructor. cfg is the adapter's slice of
|
||||
// imagen.yaml. Required keys: base_url, model. The rest have sensible FLUX
|
||||
// schnell defaults.
|
||||
func NewComfy(name string, cfg map[string]any) (Backend, error) {
|
||||
if name == "" {
|
||||
return nil, fmt.Errorf("comfyui: empty instance name")
|
||||
}
|
||||
base := strings.TrimRight(getString(cfg, "base_url", ""), "/")
|
||||
if base == "" {
|
||||
return nil, fmt.Errorf("comfyui[%s]: base_url is required", name)
|
||||
}
|
||||
if _, err := url.Parse(base); err != nil {
|
||||
return nil, fmt.Errorf("comfyui[%s]: base_url %q invalid: %w", name, base, err)
|
||||
}
|
||||
model := getString(cfg, "model", "")
|
||||
if model == "" {
|
||||
return nil, fmt.Errorf("comfyui[%s]: model is required", name)
|
||||
}
|
||||
|
||||
c := &Comfy{
|
||||
instance: name,
|
||||
base: base,
|
||||
model: model,
|
||||
|
||||
vae: getString(cfg, "vae", "ae.safetensors"),
|
||||
clipL: getString(cfg, "clip_l", "clip_l.safetensors"),
|
||||
clipT5: getString(cfg, "clip_t5", "t5xxl_fp8_e4m3fn.safetensors"),
|
||||
dtype: getString(cfg, "weight_dtype", "fp8_e4m3fn"),
|
||||
|
||||
defaultSteps: getInt(cfg, "default_steps", 4),
|
||||
defaultSampler: getString(cfg, "default_sampler", "euler"),
|
||||
defaultScheduler: getString(cfg, "default_scheduler", "simple"),
|
||||
|
||||
httpClient: &http.Client{Timeout: 60 * time.Second},
|
||||
pollInterval: 250 * time.Millisecond,
|
||||
pollTimeout: 120 * time.Second,
|
||||
|
||||
randSeed: cryptoSeed,
|
||||
clientIDFn: randClientID,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Name returns the instance name from imagen.yaml.
|
||||
func (c *Comfy) Name() string { return c.instance }
|
||||
|
||||
// Generate submits one workflow to ComfyUI, waits for it to render, and
|
||||
// returns the resulting PNG.
|
||||
func (c *Comfy) Generate(ctx context.Context, req Request) (*Result, error) {
|
||||
width := orDefaultInt(req.Width, 1024)
|
||||
height := orDefaultInt(req.Height, 1024)
|
||||
steps := orDefaultInt(req.Steps, c.defaultSteps)
|
||||
|
||||
sampler := c.defaultSampler
|
||||
scheduler := c.defaultScheduler
|
||||
if v, ok := req.BackendOpts["sampler"].(string); ok && v != "" {
|
||||
sampler = v
|
||||
}
|
||||
if v, ok := req.BackendOpts["scheduler"].(string); ok && v != "" {
|
||||
scheduler = v
|
||||
}
|
||||
|
||||
seed := req.Seed
|
||||
if seed == 0 {
|
||||
seed = c.randSeed()
|
||||
}
|
||||
|
||||
workflow := c.buildWorkflow(req.Prompt, req.NegativePrompt, width, height, seed, steps, sampler, scheduler)
|
||||
clientID := c.clientIDFn()
|
||||
|
||||
start := time.Now()
|
||||
promptID, err := c.submitPrompt(ctx, workflow, clientID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
filename, err := c.waitForCompletion(ctx, promptID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
imgBytes, err := c.fetchImage(ctx, filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
latencyMs := time.Since(start).Milliseconds()
|
||||
|
||||
meta := map[string]any{
|
||||
"backend": c.instance,
|
||||
"backend_type": ComfyType,
|
||||
"model": c.model,
|
||||
"seed": seed,
|
||||
"steps": steps,
|
||||
"sampler": sampler,
|
||||
"scheduler": scheduler,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"latency_ms": latencyMs,
|
||||
"prompt_id": promptID,
|
||||
"client_id": clientID,
|
||||
}
|
||||
if vram := c.vramUsedMiB(ctx); vram > 0 {
|
||||
meta["vram_used_mib"] = vram
|
||||
}
|
||||
|
||||
return &Result{
|
||||
ImageReader: io.NopCloser(bytes.NewReader(imgBytes)),
|
||||
MimeType: "image/png",
|
||||
Metadata: meta,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// submitPrompt POSTs the workflow and extracts the prompt_id.
|
||||
//
|
||||
// Retries once on a 5xx or transient network error. 4xx responses are not
|
||||
// retried — they are treated as configuration bugs (missing model, bad
|
||||
// workflow shape, etc.) and surfaced with a hint pointing at the docs when
|
||||
// the body matches a known pattern.
|
||||
func (c *Comfy) submitPrompt(ctx context.Context, workflow map[string]any, clientID string) (string, error) {
|
||||
body, err := json.Marshal(map[string]any{
|
||||
"prompt": workflow,
|
||||
"client_id": clientID,
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("comfyui: marshal workflow: %w", err)
|
||||
}
|
||||
|
||||
var lastErr error
|
||||
for attempt := range 2 {
|
||||
if attempt > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", ctx.Err()
|
||||
case <-time.After(time.Second):
|
||||
}
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.base+"/prompt", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
lastErr = c.connError(err)
|
||||
continue
|
||||
}
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
switch {
|
||||
case resp.StatusCode >= 200 && resp.StatusCode < 300:
|
||||
return parsePromptID(respBody, c.model)
|
||||
case resp.StatusCode >= 500:
|
||||
lastErr = fmt.Errorf("comfyui /prompt %d: %s", resp.StatusCode, snip(respBody))
|
||||
continue
|
||||
default:
|
||||
return "", c.classifyBadRequest(resp.StatusCode, respBody)
|
||||
}
|
||||
}
|
||||
return "", lastErr
|
||||
}
|
||||
|
||||
// waitForCompletion polls /history/{id} until the prompt finishes and
|
||||
// returns the filename of the produced image.
|
||||
func (c *Comfy) waitForCompletion(ctx context.Context, promptID string) (string, error) {
|
||||
deadline := time.Now().Add(c.pollTimeout)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", ctx.Err()
|
||||
default:
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return "", fmt.Errorf("comfyui: prompt %s did not complete within %s", promptID, c.pollTimeout)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.base+"/history/"+promptID, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return "", c.connError(err)
|
||||
}
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("comfyui /history/%s %d: %s", promptID, resp.StatusCode, snip(body))
|
||||
}
|
||||
filename, done, err := parseHistory(body, promptID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if done {
|
||||
return filename, nil
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", ctx.Err()
|
||||
case <-time.After(c.pollInterval):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fetchImage downloads the produced image bytes via /view.
|
||||
func (c *Comfy) fetchImage(ctx context.Context, filename string) ([]byte, error) {
|
||||
q := url.Values{
|
||||
"filename": {filename},
|
||||
"type": {"output"},
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.base+"/view?"+q.Encode(), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, c.connError(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("comfyui /view %d: %s", resp.StatusCode, snip(body))
|
||||
}
|
||||
return io.ReadAll(resp.Body)
|
||||
}
|
||||
|
||||
// vramUsedMiB returns total - free VRAM on device 0 from /system_stats, or
|
||||
// 0 if the endpoint isn't available. Best-effort, never an error.
|
||||
func (c *Comfy) vramUsedMiB(ctx context.Context) int64 {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.base+"/system_stats", nil)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return 0
|
||||
}
|
||||
var s struct {
|
||||
Devices []struct {
|
||||
VRAMTotal int64 `json:"vram_total"`
|
||||
VRAMFree int64 `json:"vram_free"`
|
||||
} `json:"devices"`
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&s); err != nil {
|
||||
return 0
|
||||
}
|
||||
if len(s.Devices) == 0 {
|
||||
return 0
|
||||
}
|
||||
used := s.Devices[0].VRAMTotal - s.Devices[0].VRAMFree
|
||||
if used < 0 {
|
||||
return 0
|
||||
}
|
||||
return used / (1024 * 1024)
|
||||
}
|
||||
|
||||
// connError translates a Go networking error into a user-actionable message,
|
||||
// pointing at the boot-whitetower script when mRock looks asleep.
|
||||
func (c *Comfy) connError(err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return err
|
||||
}
|
||||
msg := err.Error()
|
||||
var opErr *net.OpError
|
||||
asOp := errors.As(err, &opErr)
|
||||
switch {
|
||||
case asOp,
|
||||
strings.Contains(msg, "connection refused"),
|
||||
strings.Contains(msg, "no such host"),
|
||||
strings.Contains(msg, "no route to host"),
|
||||
strings.Contains(msg, "network is unreachable"),
|
||||
strings.Contains(msg, "i/o timeout"):
|
||||
return fmt.Errorf("comfyui at %s unreachable (%v) — if mRock is asleep, run: boot-whitetower mrock", c.base, err)
|
||||
}
|
||||
return fmt.Errorf("comfyui at %s: %w", c.base, err)
|
||||
}
|
||||
|
||||
// classifyBadRequest interprets a 4xx body. Some ComfyUI builds use 400 for
|
||||
// workflow-validation failures and put the diagnostics in node_errors; older
|
||||
// builds use 200 + node_errors. This handles the 4xx flavour.
|
||||
func (c *Comfy) classifyBadRequest(status int, body []byte) error {
|
||||
if hint, ok := missingModelHint(body, c.model); ok {
|
||||
return fmt.Errorf("comfyui /prompt %d: %s — see docs/setup-comfyui-mrock.md", status, hint)
|
||||
}
|
||||
return fmt.Errorf("comfyui /prompt %d: %s", status, snip(body))
|
||||
}
|
||||
|
||||
// buildWorkflow assembles the canonical FLUX.1 schnell ComfyUI workflow,
|
||||
// node-IDs matching the upstream "flux-schnell" template so anyone debugging
|
||||
// in the ComfyUI UI sees a familiar shape.
|
||||
func (c *Comfy) buildWorkflow(prompt, negative string, w, h int, seed int64, steps int, sampler, scheduler string) map[string]any {
|
||||
return map[string]any{
|
||||
"6": map[string]any{
|
||||
"class_type": "CLIPTextEncode",
|
||||
"inputs": map[string]any{
|
||||
"text": prompt,
|
||||
"clip": []any{"11", 0},
|
||||
},
|
||||
},
|
||||
"8": map[string]any{
|
||||
"class_type": "VAEDecode",
|
||||
"inputs": map[string]any{
|
||||
"samples": []any{"31", 0},
|
||||
"vae": []any{"10", 0},
|
||||
},
|
||||
},
|
||||
"9": map[string]any{
|
||||
"class_type": "SaveImage",
|
||||
"inputs": map[string]any{
|
||||
"filename_prefix": "imagen",
|
||||
"images": []any{"8", 0},
|
||||
},
|
||||
},
|
||||
"10": map[string]any{
|
||||
"class_type": "VAELoader",
|
||||
"inputs": map[string]any{"vae_name": c.vae},
|
||||
},
|
||||
"11": map[string]any{
|
||||
"class_type": "DualCLIPLoader",
|
||||
"inputs": map[string]any{
|
||||
"clip_name1": c.clipT5,
|
||||
"clip_name2": c.clipL,
|
||||
"type": "flux",
|
||||
},
|
||||
},
|
||||
"12": map[string]any{
|
||||
"class_type": "UNETLoader",
|
||||
"inputs": map[string]any{
|
||||
"unet_name": c.model,
|
||||
"weight_dtype": c.dtype,
|
||||
},
|
||||
},
|
||||
"13": map[string]any{
|
||||
"class_type": "CLIPTextEncode",
|
||||
"inputs": map[string]any{
|
||||
"text": negative,
|
||||
"clip": []any{"11", 0},
|
||||
},
|
||||
},
|
||||
"27": map[string]any{
|
||||
"class_type": "EmptySD3LatentImage",
|
||||
"inputs": map[string]any{
|
||||
"width": w,
|
||||
"height": h,
|
||||
"batch_size": 1,
|
||||
},
|
||||
},
|
||||
"30": map[string]any{
|
||||
"class_type": "ModelSamplingFlux",
|
||||
"inputs": map[string]any{
|
||||
"model": []any{"12", 0},
|
||||
"max_shift": 1.15,
|
||||
"base_shift": 0.5,
|
||||
"width": w,
|
||||
"height": h,
|
||||
},
|
||||
},
|
||||
"31": map[string]any{
|
||||
"class_type": "KSampler",
|
||||
"inputs": map[string]any{
|
||||
"model": []any{"30", 0},
|
||||
"seed": seed,
|
||||
"steps": steps,
|
||||
"cfg": 1.0,
|
||||
"sampler_name": sampler,
|
||||
"scheduler": scheduler,
|
||||
"denoise": 1.0,
|
||||
"positive": []any{"6", 0},
|
||||
"negative": []any{"13", 0},
|
||||
"latent_image": []any{"27", 0},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// parsePromptID handles the 2xx /prompt response. ComfyUI sometimes 200s a
|
||||
// validation failure and stuffs node_errors in the body — this function
|
||||
// turns that into the same user-facing error as a 4xx with the same body.
|
||||
func parsePromptID(body []byte, model string) (string, error) {
|
||||
var resp struct {
|
||||
PromptID string `json:"prompt_id"`
|
||||
NodeErrors map[string]any `json:"node_errors"`
|
||||
Error json.RawMessage `json:"error"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &resp); err != nil {
|
||||
return "", fmt.Errorf("comfyui /prompt: parse response: %w (body: %s)", err, snip(body))
|
||||
}
|
||||
if len(resp.NodeErrors) > 0 || len(resp.Error) > 0 {
|
||||
if hint, ok := missingModelHint(body, model); ok {
|
||||
return "", fmt.Errorf("comfyui /prompt: %s — see docs/setup-comfyui-mrock.md", hint)
|
||||
}
|
||||
return "", fmt.Errorf("comfyui /prompt rejected workflow: %s", snip(body))
|
||||
}
|
||||
if resp.PromptID == "" {
|
||||
return "", fmt.Errorf("comfyui /prompt: empty prompt_id (body: %s)", snip(body))
|
||||
}
|
||||
return resp.PromptID, nil
|
||||
}
|
||||
|
||||
// parseHistory inspects a /history/{id} body and returns either the produced
|
||||
// filename + done=true, or done=false to signal "keep polling".
|
||||
func parseHistory(body []byte, promptID string) (string, bool, error) {
|
||||
var entries map[string]struct {
|
||||
Status struct {
|
||||
Completed bool `json:"completed"`
|
||||
StatusStr string `json:"status_str"`
|
||||
} `json:"status"`
|
||||
Outputs map[string]struct {
|
||||
Images []struct {
|
||||
Filename string `json:"filename"`
|
||||
Subfolder string `json:"subfolder"`
|
||||
Type string `json:"type"`
|
||||
} `json:"images"`
|
||||
} `json:"outputs"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &entries); err != nil {
|
||||
return "", false, fmt.Errorf("comfyui /history: parse: %w (body: %s)", err, snip(body))
|
||||
}
|
||||
e, ok := entries[promptID]
|
||||
if !ok {
|
||||
return "", false, nil
|
||||
}
|
||||
if e.Status.StatusStr == "error" {
|
||||
return "", false, fmt.Errorf("comfyui prompt %s errored: %s", promptID, snip(body))
|
||||
}
|
||||
if !e.Status.Completed {
|
||||
return "", false, nil
|
||||
}
|
||||
for _, out := range e.Outputs {
|
||||
if len(out.Images) > 0 {
|
||||
return out.Images[0].Filename, true, nil
|
||||
}
|
||||
}
|
||||
return "", true, fmt.Errorf("comfyui prompt %s completed but produced no images", promptID)
|
||||
}
|
||||
|
||||
// missingModelHint returns a user-actionable message when the response body
|
||||
// indicates the configured unet model isn't loaded on the server. ComfyUI
|
||||
// uses both the human-readable "Value not in list" message and the enum
|
||||
// "value_not_in_list" type — match either.
|
||||
func missingModelHint(body []byte, model string) (string, bool) {
|
||||
s := string(body)
|
||||
hasMarker := strings.Contains(s, "Value not in list") || strings.Contains(s, "value_not_in_list")
|
||||
if hasMarker && strings.Contains(s, "unet_name") {
|
||||
return fmt.Sprintf("model %q not present in the ComfyUI server's models/unet/", model), true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func cryptoSeed() int64 {
|
||||
var b [8]byte
|
||||
if _, err := rand.Read(b[:]); err != nil {
|
||||
return time.Now().UnixNano()
|
||||
}
|
||||
return int64(binary.BigEndian.Uint64(b[:]) >> 1)
|
||||
}
|
||||
|
||||
func randClientID() string {
|
||||
var b [8]byte
|
||||
_, _ = rand.Read(b[:])
|
||||
return fmt.Sprintf("imagen-%x", b)
|
||||
}
|
||||
|
||||
func getString(m map[string]any, k, def string) string {
|
||||
if v, ok := m[k].(string); ok && v != "" {
|
||||
return v
|
||||
}
|
||||
return def
|
||||
}
|
||||
|
||||
func getInt(m map[string]any, k string, def int) int {
|
||||
if v, ok := m[k]; ok {
|
||||
switch n := v.(type) {
|
||||
case int:
|
||||
return n
|
||||
case int64:
|
||||
return int(n)
|
||||
case float64:
|
||||
return int(n)
|
||||
}
|
||||
}
|
||||
return def
|
||||
}
|
||||
|
||||
func orDefaultInt(v, def int) int {
|
||||
if v == 0 {
|
||||
return def
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func snip(b []byte) string {
|
||||
const max = 500
|
||||
s := strings.TrimSpace(string(b))
|
||||
if len(s) > max {
|
||||
s = s[:max] + "..."
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func init() {
|
||||
Register(ComfyType, NewComfy)
|
||||
}
|
||||
494
internal/backend/comfyui_test.go
Normal file
494
internal/backend/comfyui_test.go
Normal file
@@ -0,0 +1,494 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/png"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// fakeComfy is a programmable mock of the ComfyUI HTTP API. Tests configure
|
||||
// its behaviour by adjusting the public fields before issuing the request.
|
||||
type fakeComfy struct {
|
||||
t *testing.T
|
||||
|
||||
// /prompt
|
||||
promptStatus int
|
||||
promptBody []byte
|
||||
promptCalls atomic.Int32
|
||||
failPromptUntil int32 // first N /prompt calls return promptFailStatus
|
||||
promptFailStatus int
|
||||
promptFailBody []byte
|
||||
|
||||
// /history — start by returning {} (no entry), flip to completed once
|
||||
// historyReadyAfter polls have happened.
|
||||
historyReadyAfter int32
|
||||
historyCalls atomic.Int32
|
||||
historyError bool
|
||||
|
||||
// /view
|
||||
viewStatus int
|
||||
viewBody []byte
|
||||
viewType string
|
||||
|
||||
// /system_stats
|
||||
statsTotal int64
|
||||
statsFree int64
|
||||
|
||||
server *httptest.Server
|
||||
}
|
||||
|
||||
func (f *fakeComfy) handler() http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/prompt" && r.Method == http.MethodPost:
|
||||
n := f.promptCalls.Add(1)
|
||||
if n <= int32(f.failPromptUntil) {
|
||||
w.WriteHeader(f.promptFailStatus)
|
||||
_, _ = w.Write(f.promptFailBody)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(f.promptStatus)
|
||||
_, _ = w.Write(f.promptBody)
|
||||
case strings.HasPrefix(r.URL.Path, "/history/") && r.Method == http.MethodGet:
|
||||
n := f.historyCalls.Add(1)
|
||||
id := strings.TrimPrefix(r.URL.Path, "/history/")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
if f.historyError {
|
||||
_, _ = fmt.Fprintf(w, `{"%s":{"status":{"completed":false,"status_str":"error"},"outputs":{}}}`, id)
|
||||
return
|
||||
}
|
||||
if n <= f.historyReadyAfter {
|
||||
_, _ = w.Write([]byte(`{}`))
|
||||
return
|
||||
}
|
||||
_, _ = fmt.Fprintf(w,
|
||||
`{"%s":{"status":{"completed":true,"status_str":"success"},"outputs":{"9":{"images":[{"filename":"imagen_00001_.png","subfolder":"","type":"output"}]}}}}`,
|
||||
id,
|
||||
)
|
||||
case r.URL.Path == "/view" && r.Method == http.MethodGet:
|
||||
ct := f.viewType
|
||||
if ct == "" {
|
||||
ct = "image/png"
|
||||
}
|
||||
w.Header().Set("Content-Type", ct)
|
||||
w.WriteHeader(f.viewStatus)
|
||||
_, _ = w.Write(f.viewBody)
|
||||
case r.URL.Path == "/system_stats" && r.Method == http.MethodGet:
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
body := map[string]any{
|
||||
"system": map[string]any{},
|
||||
"devices": []map[string]any{
|
||||
{"vram_total": f.statsTotal, "vram_free": f.statsFree},
|
||||
},
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(body)
|
||||
default:
|
||||
f.t.Errorf("fakeComfy: unexpected request %s %s", r.Method, r.URL.Path)
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (f *fakeComfy) start() {
|
||||
f.server = httptest.NewServer(f.handler())
|
||||
f.t.Cleanup(f.server.Close)
|
||||
}
|
||||
|
||||
// newFakeComfy spins up a fakeComfy with happy-path defaults.
|
||||
func newFakeComfy(t *testing.T) *fakeComfy {
|
||||
t.Helper()
|
||||
f := &fakeComfy{
|
||||
t: t,
|
||||
promptStatus: http.StatusOK,
|
||||
promptBody: []byte(`{"prompt_id":"pid-abc","number":1,"node_errors":{}}`),
|
||||
viewStatus: http.StatusOK,
|
||||
viewBody: mustPNG(t, 16, 16),
|
||||
statsTotal: 16 * 1024 * 1024 * 1024,
|
||||
statsFree: 8 * 1024 * 1024 * 1024,
|
||||
}
|
||||
f.start()
|
||||
return f
|
||||
}
|
||||
|
||||
// newComfy returns a Comfy pointed at f, with poll interval squashed for fast
|
||||
// tests and deterministic seed/client_id.
|
||||
func newComfy(t *testing.T, f *fakeComfy) *Comfy {
|
||||
t.Helper()
|
||||
be, err := NewComfy("flux-test", map[string]any{
|
||||
"base_url": f.server.URL,
|
||||
"model": "flux1-schnell.safetensors",
|
||||
"default_steps": 4,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("NewComfy: %v", err)
|
||||
}
|
||||
c := be.(*Comfy)
|
||||
c.pollInterval = time.Millisecond
|
||||
c.pollTimeout = 5 * time.Second
|
||||
c.randSeed = func() int64 { return 42 }
|
||||
c.clientIDFn = func() string { return "imagen-test" }
|
||||
return c
|
||||
}
|
||||
|
||||
func mustPNG(t *testing.T, w, h int) []byte {
|
||||
t.Helper()
|
||||
img := image.NewRGBA(image.Rect(0, 0, w, h))
|
||||
for y := range h {
|
||||
for x := range w {
|
||||
img.Set(x, y, color.RGBA{R: 200, G: 100, B: 50, A: 255})
|
||||
}
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
if err := png.Encode(&buf, img); err != nil {
|
||||
t.Fatalf("encode png: %v", err)
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func TestComfyConstructorRequiresBaseAndModel(t *testing.T) {
|
||||
if _, err := NewComfy("x", map[string]any{}); err == nil {
|
||||
t.Errorf("expected error for missing base_url")
|
||||
}
|
||||
if _, err := NewComfy("x", map[string]any{"base_url": "http://h:1"}); err == nil {
|
||||
t.Errorf("expected error for missing model")
|
||||
}
|
||||
if _, err := NewComfy("", map[string]any{"base_url": "http://h:1", "model": "m"}); err == nil {
|
||||
t.Errorf("expected error for empty instance name")
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyHappyPath(t *testing.T) {
|
||||
f := newFakeComfy(t)
|
||||
f.historyReadyAfter = 2 // exercise the polling loop
|
||||
c := newComfy(t, f)
|
||||
|
||||
res, err := c.Generate(context.Background(), Request{
|
||||
Prompt: "a small fishbowl with a cat",
|
||||
Width: 512,
|
||||
Height: 512,
|
||||
Steps: 4,
|
||||
Seed: 1234567,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
defer res.ImageReader.Close()
|
||||
|
||||
if res.MimeType != "image/png" {
|
||||
t.Errorf("mime = %q", res.MimeType)
|
||||
}
|
||||
body, err := io.ReadAll(res.ImageReader)
|
||||
if err != nil {
|
||||
t.Fatalf("read body: %v", err)
|
||||
}
|
||||
if !bytes.Equal(body, f.viewBody) {
|
||||
t.Errorf("image body did not round-trip")
|
||||
}
|
||||
|
||||
if seed, _ := res.Metadata["seed"].(int64); seed != 1234567 {
|
||||
t.Errorf("metadata seed = %v", res.Metadata["seed"])
|
||||
}
|
||||
if model, _ := res.Metadata["model"].(string); model != "flux1-schnell.safetensors" {
|
||||
t.Errorf("metadata model = %v", res.Metadata["model"])
|
||||
}
|
||||
if steps, _ := res.Metadata["steps"].(int); steps != 4 {
|
||||
t.Errorf("metadata steps = %v", res.Metadata["steps"])
|
||||
}
|
||||
if pid, _ := res.Metadata["prompt_id"].(string); pid != "pid-abc" {
|
||||
t.Errorf("metadata prompt_id = %v", res.Metadata["prompt_id"])
|
||||
}
|
||||
if _, ok := res.Metadata["latency_ms"]; !ok {
|
||||
t.Errorf("metadata missing latency_ms")
|
||||
}
|
||||
// vram_used_mib is best-effort but should be present given our mock stats
|
||||
if vram, _ := res.Metadata["vram_used_mib"].(int64); vram != 8192 {
|
||||
t.Errorf("metadata vram_used_mib = %v, want 8192", res.Metadata["vram_used_mib"])
|
||||
}
|
||||
|
||||
if got := f.historyCalls.Load(); got < 3 {
|
||||
t.Errorf("expected at least 3 /history polls, got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyDefaultsAppliedWhenZero(t *testing.T) {
|
||||
f := newFakeComfy(t)
|
||||
c := newComfy(t, f)
|
||||
|
||||
res, err := c.Generate(context.Background(), Request{Prompt: "p"}) // all-zero
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
defer res.ImageReader.Close()
|
||||
_, _ = io.ReadAll(res.ImageReader)
|
||||
|
||||
if w, _ := res.Metadata["width"].(int); w != 1024 {
|
||||
t.Errorf("width default = %v", res.Metadata["width"])
|
||||
}
|
||||
if steps, _ := res.Metadata["steps"].(int); steps != 4 {
|
||||
t.Errorf("steps default = %v", res.Metadata["steps"])
|
||||
}
|
||||
if seed, _ := res.Metadata["seed"].(int64); seed != 42 {
|
||||
t.Errorf("seed default (test rand hook) = %v", res.Metadata["seed"])
|
||||
}
|
||||
if s, _ := res.Metadata["sampler"].(string); s != "euler" {
|
||||
t.Errorf("sampler default = %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyPromptRetriesOnce5xx(t *testing.T) {
|
||||
f := newFakeComfy(t)
|
||||
f.failPromptUntil = 1
|
||||
f.promptFailStatus = http.StatusBadGateway
|
||||
f.promptFailBody = []byte("upstream busy")
|
||||
c := newComfy(t, f)
|
||||
|
||||
res, err := c.Generate(context.Background(), Request{Prompt: "p", Width: 64, Height: 64})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate (with one 502 then OK): %v", err)
|
||||
}
|
||||
defer res.ImageReader.Close()
|
||||
_, _ = io.ReadAll(res.ImageReader)
|
||||
|
||||
if got := f.promptCalls.Load(); got != 2 {
|
||||
t.Errorf("expected exactly 2 /prompt calls (1 fail + 1 retry), got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyPromptGivesUpAfterTwo5xx(t *testing.T) {
|
||||
f := newFakeComfy(t)
|
||||
f.failPromptUntil = 99 // every call fails
|
||||
f.promptFailStatus = http.StatusServiceUnavailable
|
||||
f.promptFailBody = []byte("nope")
|
||||
c := newComfy(t, f)
|
||||
|
||||
_, err := c.Generate(context.Background(), Request{Prompt: "p", Width: 64, Height: 64})
|
||||
if err == nil {
|
||||
t.Fatal("expected error after sustained 503s")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "503") {
|
||||
t.Errorf("expected error to mention 503, got %v", err)
|
||||
}
|
||||
if got := f.promptCalls.Load(); got != 2 {
|
||||
t.Errorf("expected exactly 2 /prompt calls (no further retries), got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyPromptDoesNotRetryOn4xx(t *testing.T) {
|
||||
f := newFakeComfy(t)
|
||||
f.failPromptUntil = 99
|
||||
f.promptFailStatus = http.StatusBadRequest
|
||||
f.promptFailBody = []byte(`{"error":{"type":"prompt_outputs_failed_validation"},"node_errors":{"some":"thing"}}`)
|
||||
c := newComfy(t, f)
|
||||
|
||||
_, err := c.Generate(context.Background(), Request{Prompt: "p", Width: 64, Height: 64})
|
||||
if err == nil {
|
||||
t.Fatal("expected error for 400")
|
||||
}
|
||||
if got := f.promptCalls.Load(); got != 1 {
|
||||
t.Errorf("expected exactly 1 /prompt call (no retry on 4xx), got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyMissingModelHintsAtSetupDoc(t *testing.T) {
|
||||
f := newFakeComfy(t)
|
||||
f.failPromptUntil = 99
|
||||
f.promptFailStatus = http.StatusBadRequest
|
||||
f.promptFailBody = []byte(`{"error":{"type":"prompt_outputs_failed_validation","message":"Prompt outputs failed validation"},"node_errors":{"12":{"errors":[{"type":"value_not_in_list","message":"Value not in list","details":"unet_name: 'flux1-schnell.safetensors' not in []"}]}}}`)
|
||||
c := newComfy(t, f)
|
||||
|
||||
_, err := c.Generate(context.Background(), Request{Prompt: "p", Width: 64, Height: 64})
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
msg := err.Error()
|
||||
if !strings.Contains(msg, "docs/setup-comfyui-mrock.md") {
|
||||
t.Errorf("error should point at the setup doc, got %v", err)
|
||||
}
|
||||
if !strings.Contains(msg, "flux1-schnell.safetensors") {
|
||||
t.Errorf("error should name the missing model, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyMissingModelOn200WithNodeErrors(t *testing.T) {
|
||||
// Older ComfyUI builds 200 a workflow-validation failure.
|
||||
f := newFakeComfy(t)
|
||||
f.promptStatus = http.StatusOK
|
||||
f.promptBody = []byte(`{"prompt_id":"","node_errors":{"12":{"errors":[{"type":"value_not_in_list","details":"unet_name: 'flux1-schnell.safetensors' not in []"}]}}}`)
|
||||
c := newComfy(t, f)
|
||||
|
||||
_, err := c.Generate(context.Background(), Request{Prompt: "p", Width: 64, Height: 64})
|
||||
if err == nil {
|
||||
t.Fatal("expected error for node_errors on 200")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "docs/setup-comfyui-mrock.md") {
|
||||
t.Errorf("error should point at the setup doc, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyHistoryErrorSurfaced(t *testing.T) {
|
||||
f := newFakeComfy(t)
|
||||
f.historyError = true
|
||||
c := newComfy(t, f)
|
||||
|
||||
_, err := c.Generate(context.Background(), Request{Prompt: "p", Width: 64, Height: 64})
|
||||
if err == nil {
|
||||
t.Fatal("expected error when history reports execution error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "errored") {
|
||||
t.Errorf("expected 'errored' in message, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyViewFailureSurfaced(t *testing.T) {
|
||||
f := newFakeComfy(t)
|
||||
f.viewStatus = http.StatusNotFound
|
||||
f.viewBody = []byte("nope")
|
||||
c := newComfy(t, f)
|
||||
|
||||
_, err := c.Generate(context.Background(), Request{Prompt: "p", Width: 64, Height: 64})
|
||||
if err == nil {
|
||||
t.Fatal("expected error when /view 404s")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "404") {
|
||||
t.Errorf("expected status code in error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyUnreachableHostMentionsBootHelper(t *testing.T) {
|
||||
be, err := NewComfy("flux-test", map[string]any{
|
||||
"base_url": "http://127.0.0.1:1", // closed port; connection refused
|
||||
"model": "flux1-schnell.safetensors",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("NewComfy: %v", err)
|
||||
}
|
||||
c := be.(*Comfy)
|
||||
c.httpClient = &http.Client{Timeout: 500 * time.Millisecond}
|
||||
|
||||
_, err = c.Generate(context.Background(), Request{Prompt: "p", Width: 64, Height: 64})
|
||||
if err == nil {
|
||||
t.Fatal("expected error for unreachable host")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "boot-whitetower mrock") {
|
||||
t.Errorf("expected boot-helper hint, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyContextCancelStopsPolling(t *testing.T) {
|
||||
f := newFakeComfy(t)
|
||||
f.historyReadyAfter = 1_000_000 // never finishes
|
||||
c := newComfy(t, f)
|
||||
c.pollInterval = 5 * time.Millisecond
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
_, err := c.Generate(ctx, Request{Prompt: "p", Width: 64, Height: 64})
|
||||
if err == nil {
|
||||
t.Fatal("expected ctx.Err()")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "context deadline exceeded") {
|
||||
t.Errorf("expected deadline exceeded, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyWorkflowReflectsRequest(t *testing.T) {
|
||||
// Capture the workflow body to assert KSampler + EmptyLatentImage values.
|
||||
var captured []byte
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/prompt":
|
||||
captured, _ = io.ReadAll(r.Body)
|
||||
_, _ = w.Write([]byte(`{"prompt_id":"pid","number":1,"node_errors":{}}`))
|
||||
case "/history/pid":
|
||||
_, _ = w.Write([]byte(`{"pid":{"status":{"completed":true,"status_str":"success"},"outputs":{"9":{"images":[{"filename":"imagen_00001_.png","subfolder":"","type":"output"}]}}}}`))
|
||||
case "/view":
|
||||
_, _ = w.Write(mustPNG(t, 8, 8))
|
||||
case "/system_stats":
|
||||
_, _ = w.Write([]byte(`{"devices":[{"vram_total":1,"vram_free":1}]}`))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
be, err := NewComfy("flux-test", map[string]any{
|
||||
"base_url": srv.URL,
|
||||
"model": "custom.safetensors",
|
||||
"default_steps": 7,
|
||||
"default_sampler": "dpmpp_2m",
|
||||
"default_scheduler": "karras",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("NewComfy: %v", err)
|
||||
}
|
||||
c := be.(*Comfy)
|
||||
c.pollInterval = time.Millisecond
|
||||
c.randSeed = func() int64 { return 9999 }
|
||||
|
||||
res, err := c.Generate(context.Background(), Request{
|
||||
Prompt: "a cat",
|
||||
NegativePrompt: "blurry",
|
||||
Width: 768,
|
||||
Height: 512,
|
||||
Steps: 11,
|
||||
Seed: 555,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
res.ImageReader.Close()
|
||||
|
||||
var sent struct {
|
||||
Prompt map[string]map[string]any `json:"prompt"`
|
||||
ClientID string `json:"client_id"`
|
||||
}
|
||||
if err := json.Unmarshal(captured, &sent); err != nil {
|
||||
t.Fatalf("unmarshal captured: %v", err)
|
||||
}
|
||||
ks := sent.Prompt["31"]["inputs"].(map[string]any)
|
||||
if ks["seed"].(float64) != 555 {
|
||||
t.Errorf("KSampler seed = %v, want 555", ks["seed"])
|
||||
}
|
||||
if ks["steps"].(float64) != 11 {
|
||||
t.Errorf("KSampler steps = %v, want 11", ks["steps"])
|
||||
}
|
||||
if ks["sampler_name"].(string) != "dpmpp_2m" {
|
||||
t.Errorf("sampler_name = %v", ks["sampler_name"])
|
||||
}
|
||||
if ks["scheduler"].(string) != "karras" {
|
||||
t.Errorf("scheduler = %v", ks["scheduler"])
|
||||
}
|
||||
latent := sent.Prompt["27"]["inputs"].(map[string]any)
|
||||
if latent["width"].(float64) != 768 || latent["height"].(float64) != 512 {
|
||||
t.Errorf("EmptySD3LatentImage size = %vx%v", latent["width"], latent["height"])
|
||||
}
|
||||
unet := sent.Prompt["12"]["inputs"].(map[string]any)
|
||||
if unet["unet_name"].(string) != "custom.safetensors" {
|
||||
t.Errorf("unet_name = %v", unet["unet_name"])
|
||||
}
|
||||
neg := sent.Prompt["13"]["inputs"].(map[string]any)
|
||||
if neg["text"].(string) != "blurry" {
|
||||
t.Errorf("negative prompt not threaded: %v", neg["text"])
|
||||
}
|
||||
if !strings.HasPrefix(sent.ClientID, "imagen-") && sent.ClientID == "" {
|
||||
t.Errorf("client_id should be set: %q", sent.ClientID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComfyTypeIsRegistered(t *testing.T) {
|
||||
if !Default.Has(ComfyType) {
|
||||
t.Errorf("comfyui type not registered in Default")
|
||||
}
|
||||
}
|
||||
@@ -19,11 +19,16 @@ type Config struct {
|
||||
Backends map[string]BackendSpec `yaml:"backends"`
|
||||
}
|
||||
|
||||
// OutputConfig controls where generated images and metadata sidecars land.
|
||||
// OutputConfig controls where generated images and metadata sidecars land,
|
||||
// and whether `imagen generate` opens a tmux preview window.
|
||||
type OutputConfig struct {
|
||||
Directory string `yaml:"directory"`
|
||||
Naming string `yaml:"naming"`
|
||||
WriteMetadataJSON bool `yaml:"write_metadata_json"`
|
||||
// Preview is the tri-state preview mode: "auto" (default), "on", "off".
|
||||
// Empty / unset is treated as "auto". $IMAGEN_PREVIEW and the
|
||||
// --preview/--no-preview flags override this in turn.
|
||||
Preview string `yaml:"preview"`
|
||||
}
|
||||
|
||||
// BackendSpec is one entry under `backends:`. Type identifies the adapter;
|
||||
@@ -78,6 +83,11 @@ func (c *Config) Validate() error {
|
||||
return fmt.Errorf("default_backend %q is not defined under backends:", c.DefaultBackend)
|
||||
}
|
||||
}
|
||||
switch c.Output.Preview {
|
||||
case "", "auto", "on", "off":
|
||||
default:
|
||||
return fmt.Errorf("output.preview = %q (must be auto|on|off)", c.Output.Preview)
|
||||
}
|
||||
for name, spec := range c.Backends {
|
||||
if name == "" {
|
||||
return errors.New("empty backend name")
|
||||
@@ -95,22 +105,31 @@ const Sample = `# imagen.yaml — config for the imagen CLI.
|
||||
# implementing the Backend interface, registering its type name, and listing
|
||||
# an instance here.
|
||||
|
||||
default_backend: mock
|
||||
default_backend: flux-schnell-local
|
||||
|
||||
output:
|
||||
directory: ~/Pictures/imagen
|
||||
naming: "{date}-{slug}-{seed}.png"
|
||||
write_metadata_json: true
|
||||
# Open a tmux window with tmux-img after a successful generation.
|
||||
# auto (default): preview iff stdout is a TTY and $TMUX is set.
|
||||
# on: always preview (errors outside a tmux session).
|
||||
# off: never preview (use this for batch / CI callers).
|
||||
preview: auto
|
||||
|
||||
backends:
|
||||
mock:
|
||||
type: mock
|
||||
|
||||
flux-schnell-local:
|
||||
type: comfyui
|
||||
base_url: http://mrock:8188
|
||||
# Filename of the unet checkpoint inside the ComfyUI server's
|
||||
# models/unet/ directory. See docs/setup-comfyui-mrock.md.
|
||||
model: flux1-schnell.safetensors
|
||||
default_steps: 4
|
||||
default_sampler: euler
|
||||
default_scheduler: simple
|
||||
|
||||
mock:
|
||||
type: mock
|
||||
|
||||
flux-dev-replicate:
|
||||
type: replicate
|
||||
|
||||
@@ -16,7 +16,7 @@ func TestLoadAndValidate(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("Load: %v", err)
|
||||
}
|
||||
if cfg.DefaultBackend != "mock" {
|
||||
if cfg.DefaultBackend != "flux-schnell-local" {
|
||||
t.Errorf("default = %q", cfg.DefaultBackend)
|
||||
}
|
||||
mock, ok := cfg.Backends["mock"]
|
||||
@@ -30,9 +30,15 @@ func TestLoadAndValidate(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatalf("flux backend missing")
|
||||
}
|
||||
if flux.Type != "comfyui" {
|
||||
t.Errorf("flux type = %q", flux.Type)
|
||||
}
|
||||
if flux.Raw["base_url"] != "http://mrock:8188" {
|
||||
t.Errorf("flux base_url = %v", flux.Raw["base_url"])
|
||||
}
|
||||
if flux.Raw["model"] != "flux1-schnell.safetensors" {
|
||||
t.Errorf("flux model = %v", flux.Raw["model"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRejectsUnknownDefault(t *testing.T) {
|
||||
@@ -54,6 +60,34 @@ func TestValidateRejectsMissingType(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidatePreviewMode(t *testing.T) {
|
||||
for _, mode := range []string{"", "auto", "on", "off"} {
|
||||
c := &Config{Output: OutputConfig{Preview: mode}}
|
||||
if err := c.Validate(); err != nil {
|
||||
t.Errorf("preview=%q: unexpected error %v", mode, err)
|
||||
}
|
||||
}
|
||||
bad := &Config{Output: OutputConfig{Preview: "yes"}}
|
||||
if err := bad.Validate(); err == nil {
|
||||
t.Errorf("expected error for invalid preview value")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSampleParsesPreviewAuto(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "imagen.yaml")
|
||||
if err := os.WriteFile(path, []byte(Sample), 0o644); err != nil {
|
||||
t.Fatalf("write sample: %v", err)
|
||||
}
|
||||
cfg, err := Load(path)
|
||||
if err != nil {
|
||||
t.Fatalf("Load: %v", err)
|
||||
}
|
||||
if cfg.Output.Preview != "auto" {
|
||||
t.Errorf("Output.Preview = %q, want auto", cfg.Output.Preview)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpandPath(t *testing.T) {
|
||||
home, _ := os.UserHomeDir()
|
||||
cases := map[string]string{
|
||||
|
||||
119
internal/preview/tmux.go
Normal file
119
internal/preview/tmux.go
Normal file
@@ -0,0 +1,119 @@
|
||||
// Package preview opens a tmux window showing a generated image via tmux-img.
|
||||
// Mode resolution and the actual spawn are kept separate so the CLI can
|
||||
// decide-then-act and tests can drive each half independently.
|
||||
package preview
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Mode is the tri-state preview setting: auto (default), on (force), off.
|
||||
type Mode string
|
||||
|
||||
const (
|
||||
ModeAuto Mode = "auto"
|
||||
ModeOn Mode = "on"
|
||||
ModeOff Mode = "off"
|
||||
)
|
||||
|
||||
// ParseMode normalises a string into a Mode. Empty parses to ModeAuto so
|
||||
// callers can pass through unset config / env values.
|
||||
func ParseMode(s string) (Mode, error) {
|
||||
switch strings.ToLower(strings.TrimSpace(s)) {
|
||||
case "", "auto":
|
||||
return ModeAuto, nil
|
||||
case "on":
|
||||
return ModeOn, nil
|
||||
case "off":
|
||||
return ModeOff, nil
|
||||
}
|
||||
return "", fmt.Errorf("invalid preview mode %q (auto|on|off)", s)
|
||||
}
|
||||
|
||||
// Decision is the answer to "should we preview, and why".
|
||||
type Decision struct {
|
||||
ShouldPreview bool
|
||||
Reason string
|
||||
}
|
||||
|
||||
// Resolve maps (mode, runtime context) to a Decision.
|
||||
//
|
||||
// - off -> never preview
|
||||
// - on -> preview, but error if not in tmux (forced on outside tmux)
|
||||
// - auto -> preview iff inTmux && stdoutTTY
|
||||
func Resolve(mode Mode, inTmux, stdoutTTY bool) (Decision, error) {
|
||||
switch mode {
|
||||
case ModeOff:
|
||||
return Decision{ShouldPreview: false, Reason: "preview=off"}, nil
|
||||
case ModeOn:
|
||||
if !inTmux {
|
||||
return Decision{}, ErrNoTmuxForced
|
||||
}
|
||||
return Decision{ShouldPreview: true, Reason: "preview=on"}, nil
|
||||
case ModeAuto, "":
|
||||
if !inTmux {
|
||||
return Decision{ShouldPreview: false, Reason: "auto: $TMUX unset"}, nil
|
||||
}
|
||||
if !stdoutTTY {
|
||||
return Decision{ShouldPreview: false, Reason: "auto: stdout not a tty"}, nil
|
||||
}
|
||||
return Decision{ShouldPreview: true, Reason: "auto"}, nil
|
||||
}
|
||||
return Decision{}, fmt.Errorf("invalid preview mode %q", mode)
|
||||
}
|
||||
|
||||
// Errors returned by Spawn and Resolve. Each names the missing piece and,
|
||||
// where relevant, where to install it.
|
||||
var (
|
||||
ErrTmuxMissing = errors.New("tmux: binary not found on $PATH (required for image preview)")
|
||||
ErrTmuxImgMissing = errors.New("tmux-img: binary not found on $PATH (install at ~/.local/bin/tmux-img)")
|
||||
ErrNoTmuxForced = errors.New("--preview requires $TMUX (are you in a tmux session?)")
|
||||
)
|
||||
|
||||
// Spawner spawns the tmux preview window. The exec.LookPath / cmd.Run hooks
|
||||
// exist so tests can inject fakes without touching $PATH.
|
||||
type Spawner struct {
|
||||
LookPath func(string) (string, error)
|
||||
Run func(*exec.Cmd) error
|
||||
}
|
||||
|
||||
// Spawn opens a new tmux window named img:<slug> running tmux-img --hold
|
||||
// <imagePath>. -d keeps focus in the current pane. Caller is expected to
|
||||
// have already verified that we are inside a tmux session.
|
||||
func (s *Spawner) Spawn(imagePath, slug string) error {
|
||||
look := s.LookPath
|
||||
if look == nil {
|
||||
look = exec.LookPath
|
||||
}
|
||||
run := s.Run
|
||||
if run == nil {
|
||||
run = func(c *exec.Cmd) error { return c.Run() }
|
||||
}
|
||||
|
||||
tmuxBin, err := look("tmux")
|
||||
if err != nil {
|
||||
return ErrTmuxMissing
|
||||
}
|
||||
tmuxImgBin, err := look("tmux-img")
|
||||
if err != nil {
|
||||
return ErrTmuxImgMissing
|
||||
}
|
||||
|
||||
name := "img:" + slug
|
||||
shellCmd := fmt.Sprintf("%s --hold %s",
|
||||
shellQuote(tmuxImgBin), shellQuote(imagePath))
|
||||
cmd := exec.Command(tmuxBin, "new-window", "-d", "-n", name, shellCmd)
|
||||
if err := run(cmd); err != nil {
|
||||
return fmt.Errorf("tmux new-window: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// shellQuote single-quotes s for /bin/sh — tmux passes the trailing arg of
|
||||
// new-window through a shell.
|
||||
func shellQuote(s string) string {
|
||||
return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
|
||||
}
|
||||
170
internal/preview/tmux_test.go
Normal file
170
internal/preview/tmux_test.go
Normal file
@@ -0,0 +1,170 @@
|
||||
package preview
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseMode(t *testing.T) {
|
||||
cases := map[string]Mode{
|
||||
"": ModeAuto,
|
||||
"auto": ModeAuto,
|
||||
"AUTO": ModeAuto,
|
||||
"on": ModeOn,
|
||||
" on ": ModeOn,
|
||||
"off": ModeOff,
|
||||
}
|
||||
for in, want := range cases {
|
||||
got, err := ParseMode(in)
|
||||
if err != nil {
|
||||
t.Errorf("ParseMode(%q) err = %v", in, err)
|
||||
continue
|
||||
}
|
||||
if got != want {
|
||||
t.Errorf("ParseMode(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
if _, err := ParseMode("nope"); err == nil {
|
||||
t.Errorf("ParseMode(nope) should have errored")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolve(t *testing.T) {
|
||||
type tc struct {
|
||||
mode Mode
|
||||
inTmux bool
|
||||
stdoutTTY bool
|
||||
want bool
|
||||
wantErr error
|
||||
}
|
||||
cases := map[string]tc{
|
||||
"off-anywhere": {ModeOff, false, false, false, nil},
|
||||
"off-in-tmux-tty": {ModeOff, true, true, false, nil},
|
||||
"on-in-tmux": {ModeOn, true, false, true, nil},
|
||||
"on-outside-tmux-errs": {ModeOn, false, true, false, ErrNoTmuxForced},
|
||||
"auto-no-tmux": {ModeAuto, false, true, false, nil},
|
||||
"auto-tmux-no-tty": {ModeAuto, true, false, false, nil},
|
||||
"auto-tmux-and-tty": {ModeAuto, true, true, true, nil},
|
||||
}
|
||||
for name, c := range cases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
d, err := Resolve(c.mode, c.inTmux, c.stdoutTTY)
|
||||
if c.wantErr != nil {
|
||||
if !errors.Is(err, c.wantErr) {
|
||||
t.Fatalf("err = %v, want %v", err, c.wantErr)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("err = %v", err)
|
||||
}
|
||||
if d.ShouldPreview != c.want {
|
||||
t.Errorf("ShouldPreview = %v, want %v (reason: %s)", d.ShouldPreview, c.want, d.Reason)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpawn_BuildsCorrectCommand(t *testing.T) {
|
||||
var captured *exec.Cmd
|
||||
s := &Spawner{
|
||||
LookPath: func(name string) (string, error) {
|
||||
switch name {
|
||||
case "tmux":
|
||||
return "/usr/bin/tmux", nil
|
||||
case "tmux-img":
|
||||
return "/home/m/.local/bin/tmux-img", nil
|
||||
}
|
||||
return "", exec.ErrNotFound
|
||||
},
|
||||
Run: func(c *exec.Cmd) error {
|
||||
captured = c
|
||||
return nil
|
||||
},
|
||||
}
|
||||
if err := s.Spawn("/tmp/imagen/cat.png", "cat-in-a-fishbowl"); err != nil {
|
||||
t.Fatalf("Spawn: %v", err)
|
||||
}
|
||||
if captured == nil {
|
||||
t.Fatal("Run was not called")
|
||||
}
|
||||
if captured.Path != "/usr/bin/tmux" {
|
||||
t.Errorf("Path = %q, want /usr/bin/tmux", captured.Path)
|
||||
}
|
||||
args := captured.Args
|
||||
if len(args) < 6 {
|
||||
t.Fatalf("args = %v (need at least 6)", args)
|
||||
}
|
||||
// tmux new-window -d -n img:<slug> '<shell-cmd>'
|
||||
if args[1] != "new-window" {
|
||||
t.Errorf("args[1] = %q, want new-window", args[1])
|
||||
}
|
||||
if args[2] != "-d" {
|
||||
t.Errorf("args[2] = %q, want -d", args[2])
|
||||
}
|
||||
if args[3] != "-n" {
|
||||
t.Errorf("args[3] = %q, want -n", args[3])
|
||||
}
|
||||
if args[4] != "img:cat-in-a-fishbowl" {
|
||||
t.Errorf("args[4] = %q, want img:cat-in-a-fishbowl", args[4])
|
||||
}
|
||||
shellCmd := args[5]
|
||||
if !strings.Contains(shellCmd, "tmux-img") || !strings.Contains(shellCmd, "--hold") || !strings.Contains(shellCmd, "/tmp/imagen/cat.png") {
|
||||
t.Errorf("shell cmd %q missing expected pieces", shellCmd)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpawn_PathWithSpacesAndQuotes(t *testing.T) {
|
||||
var captured *exec.Cmd
|
||||
s := &Spawner{
|
||||
LookPath: func(name string) (string, error) {
|
||||
if name == "tmux" {
|
||||
return "/usr/bin/tmux", nil
|
||||
}
|
||||
if name == "tmux-img" {
|
||||
return "/usr/local/bin/tmux-img", nil
|
||||
}
|
||||
return "", exec.ErrNotFound
|
||||
},
|
||||
Run: func(c *exec.Cmd) error { captured = c; return nil },
|
||||
}
|
||||
weird := "/tmp/imagen/o'malley's cat.png"
|
||||
if err := s.Spawn(weird, "slug"); err != nil {
|
||||
t.Fatalf("Spawn: %v", err)
|
||||
}
|
||||
shellCmd := captured.Args[5]
|
||||
// Single-quoted with the embedded apostrophe escaped via the
|
||||
// '\'' shell idiom — confirm we did not just splice the raw path.
|
||||
if strings.Contains(shellCmd, "o'malley's") {
|
||||
t.Errorf("shell cmd %q contains unescaped apostrophes", shellCmd)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpawn_MissingTmux(t *testing.T) {
|
||||
s := &Spawner{
|
||||
LookPath: func(string) (string, error) { return "", exec.ErrNotFound },
|
||||
Run: func(*exec.Cmd) error { return nil },
|
||||
}
|
||||
err := s.Spawn("/x.png", "s")
|
||||
if !errors.Is(err, ErrTmuxMissing) {
|
||||
t.Errorf("err = %v, want ErrTmuxMissing", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpawn_MissingTmuxImg(t *testing.T) {
|
||||
s := &Spawner{
|
||||
LookPath: func(name string) (string, error) {
|
||||
if name == "tmux" {
|
||||
return "/usr/bin/tmux", nil
|
||||
}
|
||||
return "", exec.ErrNotFound
|
||||
},
|
||||
Run: func(*exec.Cmd) error { return nil },
|
||||
}
|
||||
err := s.Spawn("/x.png", "s")
|
||||
if !errors.Is(err, ErrTmuxImgMissing) {
|
||||
t.Errorf("err = %v, want ErrTmuxImgMissing", err)
|
||||
}
|
||||
}
|
||||
24
scripts/comfyui.service
Normal file
24
scripts/comfyui.service
Normal file
@@ -0,0 +1,24 @@
|
||||
[Unit]
|
||||
Description=ComfyUI image generation server
|
||||
Documentation=https://github.com/comfyanonymous/ComfyUI
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=m
|
||||
Group=m
|
||||
WorkingDirectory=/home/m/dev/comfyui
|
||||
ExecStart=/home/m/dev/comfyui/.venv/bin/python /home/m/dev/comfyui/main.py \
|
||||
--listen 0.0.0.0 --port 8188 \
|
||||
--output-directory /home/m/dev/comfyui/output \
|
||||
--temp-directory /home/m/dev/comfyui/temp
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
TimeoutStopSec=30
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
LimitNOFILE=65535
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
37
scripts/download-flux-schnell.sh
Executable file
37
scripts/download-flux-schnell.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# Download FLUX.1 schnell + accompanying VAE/text encoders into a ComfyUI tree.
|
||||
# Uses ungated mirrors — the official Black-Forest-Labs repo is gated and
|
||||
# requires an HF token. See docs/setup-comfyui-mrock.md.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="${1:-$HOME/dev/comfyui/models}"
|
||||
|
||||
if [ ! -d "$ROOT" ]; then
|
||||
echo "models root $ROOT does not exist — pass it as the first argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$ROOT/unet" "$ROOT/vae" "$ROOT/clip"
|
||||
|
||||
CKPT="https://huggingface.co/Comfy-Org/flux1-schnell/resolve/main/flux1-schnell.safetensors"
|
||||
VAE="https://huggingface.co/sirorable/flux-ae-vae/resolve/main/ae.safetensors"
|
||||
CLIP_L="https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors"
|
||||
T5="https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp8_e4m3fn.safetensors"
|
||||
|
||||
dl() {
|
||||
local url=$1 dest=$2
|
||||
if [ -s "$dest" ]; then
|
||||
echo "skip $dest (already present)"
|
||||
return
|
||||
fi
|
||||
echo "downloading $url -> $dest"
|
||||
curl -L --fail --retry 3 --retry-delay 5 -C - -o "$dest" "$url"
|
||||
}
|
||||
|
||||
dl "$CKPT" "$ROOT/unet/flux1-schnell.safetensors"
|
||||
dl "$VAE" "$ROOT/vae/ae.safetensors"
|
||||
dl "$CLIP_L" "$ROOT/clip/clip_l.safetensors"
|
||||
dl "$T5" "$ROOT/clip/t5xxl_fp8_e4m3fn.safetensors"
|
||||
|
||||
echo "done"
|
||||
87
scripts/flux-schnell-poc.json
Normal file
87
scripts/flux-schnell-poc.json
Normal file
@@ -0,0 +1,87 @@
|
||||
{
|
||||
"prompt": {
|
||||
"6": {
|
||||
"class_type": "CLIPTextEncode",
|
||||
"inputs": {
|
||||
"text": "a small fishbowl with a cat staring out, photo, soft light",
|
||||
"clip": ["11", 0]
|
||||
}
|
||||
},
|
||||
"8": {
|
||||
"class_type": "VAEDecode",
|
||||
"inputs": {
|
||||
"samples": ["31", 0],
|
||||
"vae": ["10", 0]
|
||||
}
|
||||
},
|
||||
"9": {
|
||||
"class_type": "SaveImage",
|
||||
"inputs": {
|
||||
"filename_prefix": "imagen-poc",
|
||||
"images": ["8", 0]
|
||||
}
|
||||
},
|
||||
"10": {
|
||||
"class_type": "VAELoader",
|
||||
"inputs": {
|
||||
"vae_name": "ae.safetensors"
|
||||
}
|
||||
},
|
||||
"11": {
|
||||
"class_type": "DualCLIPLoader",
|
||||
"inputs": {
|
||||
"clip_name1": "t5xxl_fp8_e4m3fn.safetensors",
|
||||
"clip_name2": "clip_l.safetensors",
|
||||
"type": "flux"
|
||||
}
|
||||
},
|
||||
"12": {
|
||||
"class_type": "UNETLoader",
|
||||
"inputs": {
|
||||
"unet_name": "flux1-schnell.safetensors",
|
||||
"weight_dtype": "fp8_e4m3fn"
|
||||
}
|
||||
},
|
||||
"13": {
|
||||
"class_type": "CLIPTextEncode",
|
||||
"inputs": {
|
||||
"text": "",
|
||||
"clip": ["11", 0]
|
||||
}
|
||||
},
|
||||
"27": {
|
||||
"class_type": "EmptySD3LatentImage",
|
||||
"inputs": {
|
||||
"width": 1024,
|
||||
"height": 1024,
|
||||
"batch_size": 1
|
||||
}
|
||||
},
|
||||
"30": {
|
||||
"class_type": "ModelSamplingFlux",
|
||||
"inputs": {
|
||||
"model": ["12", 0],
|
||||
"max_shift": 1.15,
|
||||
"base_shift": 0.5,
|
||||
"width": 1024,
|
||||
"height": 1024
|
||||
}
|
||||
},
|
||||
"31": {
|
||||
"class_type": "KSampler",
|
||||
"inputs": {
|
||||
"model": ["30", 0],
|
||||
"seed": 1234567,
|
||||
"steps": 4,
|
||||
"cfg": 1.0,
|
||||
"sampler_name": "euler",
|
||||
"scheduler": "simple",
|
||||
"denoise": 1.0,
|
||||
"positive": ["6", 0],
|
||||
"negative": ["13", 0],
|
||||
"latent_image": ["27", 0]
|
||||
}
|
||||
}
|
||||
},
|
||||
"client_id": "imagen-poc-001"
|
||||
}
|
||||
Reference in New Issue
Block a user