diff --git a/cmd/imagen/compare.go b/cmd/imagen/compare.go new file mode 100644 index 0000000..c7b5ba4 --- /dev/null +++ b/cmd/imagen/compare.go @@ -0,0 +1,386 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "image" + "image/color" + "image/draw" + "image/png" + "io" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "golang.org/x/image/font" + "golang.org/x/image/font/basicfont" + "golang.org/x/image/math/fixed" + + "mgit.msbls.de/m/ImaGen/internal/backend" + "mgit.msbls.de/m/ImaGen/internal/config" + "mgit.msbls.de/m/ImaGen/internal/output" + "mgit.msbls.de/m/ImaGen/internal/prompt" +) + +// runCompare implements `imagen compare "" --models a,b,c --output `. +// +// Each backend in --models runs sequentially against the same prompt (mRock +// has a single GPU; parallelising would just OOM). Each generation lands as +// a backend-suffixed file in the output dir; a contact sheet stitches them +// together into one PNG with the backend name overlaid on each cell. A +// sidecar JSON next to the contact sheet lists every generation with its +// per-model metadata (latency, seed, model file, VRAM peak). +func runCompare(ctx context.Context, args []string) error { + fs := flag.NewFlagSet("compare", flag.ContinueOnError) + var ( + modelsCSV string + size string + outDir string + style string + negative string + seed int64 + steps int + configPath string + noContact bool + ) + fs.StringVar(&modelsCSV, "models", "", "comma-separated backend instance names (required)") + fs.StringVar(&size, "size", "1024x1024", "WxH for every backend") + fs.StringVar(&outDir, "output", "", "directory to write the images + contact sheet (default: ~/Pictures/imagen/compare)") + fs.StringVar(&style, "style", "", "style preset applied to the prompt before dispatching to each backend") + fs.StringVar(&negative, "negative", "", "negative prompt (forwarded to every backend that supports it)") + fs.Int64Var(&seed, "seed", 0, "deterministic seed for every backend (0 = each backend rolls its own)") + fs.IntVar(&steps, "steps", 0, "diffusion steps (0 = each backend's default)") + fs.StringVar(&configPath, "config", "", "config file path (default: ~/.config/imagen.yaml)") + fs.BoolVar(&noContact, "no-contact-sheet", false, "skip the composite PNG; only write per-backend images + sidecar") + fs.Usage = func() { + fmt.Fprintln(fs.Output(), `Usage: imagen compare "" --models a,b,c [flags]`) + fs.PrintDefaults() + } + leadingPositional, flagArgs := splitLeadingPositional(args) + if err := fs.Parse(flagArgs); err != nil { + return err + } + positional := append(leadingPositional, fs.Args()...) + if len(positional) == 0 { + fs.Usage() + return userErr("missing prompt") + } + rawPrompt := strings.Join(positional, " ") + modelNames := splitCSV(modelsCSV) + if len(modelNames) == 0 { + return userErr("--models is required (comma-separated backend instance names)") + } + + w, h, err := parseSize(size) + if err != nil { + return userErr("bad --size: %v", err) + } + + cfg, cfgErr := config.Load(configPath) + if cfgErr != nil && !os.IsNotExist(cfgErr) { + return cfgErr + } + + if outDir == "" { + home, _ := os.UserHomeDir() + outDir = filepath.Join(home, "Pictures", "imagen", "compare") + } + outDir = config.ExpandPath(outDir) + + finalPrompt, err := prompt.Apply(rawPrompt, style) + if err != nil { + return userErr("%v", err) + } + + runID := time.Now().Format("20060102-150405") + runDir := filepath.Join(outDir, runID+"-"+output.Slug(rawPrompt)) + if err := os.MkdirAll(runDir, 0o755); err != nil { + return fmt.Errorf("mkdir %s: %w", runDir, err) + } + + results := make([]compareResult, 0, len(modelNames)) + for i, name := range modelNames { + fmt.Fprintf(os.Stderr, "[%d/%d] %s ...\n", i+1, len(modelNames), name) + res, err := generateOne(ctx, cfg, name, finalPrompt, negative, w, h, seed, steps, runDir, rawPrompt) + if err != nil { + // Don't abort the whole run on a single backend failure — record + // the error and continue. flexsiebels-style consumers want to + // see N-1 results rather than zero when one model is offline. + fmt.Fprintf(os.Stderr, " failed: %v\n", err) + results = append(results, compareResult{Backend: name, Error: err.Error()}) + continue + } + fmt.Fprintf(os.Stderr, " %s (%d ms)\n", res.ImagePath, res.LatencyMs) + results = append(results, res) + } + + // Sidecar JSON beside the run dir captures every attempt. + sidecar := filepath.Join(runDir, "compare.json") + if err := writeCompareSidecar(sidecar, rawPrompt, style, negative, w, h, seed, steps, results); err != nil { + return err + } + fmt.Fprintln(os.Stderr, "sidecar:", sidecar) + + // Contact sheet stitches the successful results together. If every + // backend failed there's nothing to draw, so skip silently. + if !noContact { + successes := successfulResults(results) + if len(successes) > 0 { + sheet := filepath.Join(runDir, "contact-sheet.png") + if err := writeContactSheet(sheet, rawPrompt, successes); err != nil { + return fmt.Errorf("contact sheet: %w", err) + } + fmt.Println(sheet) + } else { + fmt.Fprintln(os.Stderr, "imagen compare: all backends failed; no contact sheet written") + } + } + return nil +} + +// compareResult is one backend's output in a comparison run. Error is set +// when Generate failed for this backend; ImagePath + Metadata are empty in +// that case. +type compareResult struct { + Backend string `json:"backend"` + ImagePath string `json:"image_path,omitempty"` + Seed int64 `json:"seed"` + LatencyMs int64 `json:"latency_ms,omitempty"` + Model string `json:"model,omitempty"` + VRAMUsedMiB int64 `json:"vram_used_mib,omitempty"` + Metadata map[string]any `json:"metadata,omitempty"` + Error string `json:"error,omitempty"` +} + +func generateOne(ctx context.Context, cfg *config.Config, name, finalPrompt, negative string, w, h int, seed int64, steps int, runDir, rawPrompt string) (compareResult, error) { + be, err := buildBackend(cfg, name) + if err != nil { + return compareResult{Backend: name}, err + } + attachUsageSink(be) + + req := backend.Request{ + Prompt: finalPrompt, + NegativePrompt: negative, + Width: w, + Height: h, + Steps: steps, + Seed: seed, + } + res, err := be.Generate(ctx, req) + if err != nil { + return compareResult{Backend: name}, err + } + defer res.ImageReader.Close() + + imgBytes, err := io.ReadAll(res.ImageReader) + if err != nil { + return compareResult{Backend: name}, fmt.Errorf("read image: %w", err) + } + + imgPath := filepath.Join(runDir, output.Slug(rawPrompt)+"--"+output.Slug(name)+"."+extFromMime(res.MimeType)) + if err := os.WriteFile(imgPath, imgBytes, 0o644); err != nil { + return compareResult{Backend: name}, fmt.Errorf("write %s: %w", imgPath, err) + } + + cr := compareResult{ + Backend: name, + ImagePath: imgPath, + Seed: seedFromMetadata(res.Metadata, seed), + LatencyMs: metaInt64(res.Metadata, "latency_ms"), + Model: metaString(res.Metadata, "model"), + Metadata: res.Metadata, + } + if v, ok := res.Metadata["vram_used_mib"].(int64); ok { + cr.VRAMUsedMiB = v + } + return cr, nil +} + +func successfulResults(rs []compareResult) []compareResult { + out := make([]compareResult, 0, len(rs)) + for _, r := range rs { + if r.Error == "" && r.ImagePath != "" { + out = append(out, r) + } + } + return out +} + +func writeCompareSidecar(path, rawPrompt, style, negative string, w, h int, seed int64, steps int, results []compareResult) error { + body := map[string]any{ + "timestamp": time.Now().UTC().Format(time.RFC3339), + "prompt": rawPrompt, + "style": style, + "negative": negative, + "width": w, + "height": h, + "seed": seed, + "steps": steps, + "results": results, + "backends": backendNames(results), + "successful": len(successfulResults(results)), + "total": len(results), + } + data, err := json.MarshalIndent(body, "", " ") + if err != nil { + return fmt.Errorf("marshal sidecar: %w", err) + } + return os.WriteFile(path, append(data, '\n'), 0o644) +} + +func backendNames(rs []compareResult) []string { + out := make([]string, len(rs)) + for i, r := range rs { + out[i] = r.Backend + } + sort.Strings(out) + return out +} + +// writeContactSheet stitches a grid of (image, label) cells into one PNG. +// Cells are sized to fit in a target width of ~2400px while keeping each +// individual image full-resolution (no downscale) up to the column limit; +// past that, images sit at their native size and we just lay them out. +// +// The grid is a simple horizontal row when N <= 4; otherwise N/2 rows of 2. +// This is a contact sheet, not a fancy gallery — readability for side-by- +// side eyeballing is the goal. +func writeContactSheet(path, prompt string, results []compareResult) error { + if len(results) == 0 { + return fmt.Errorf("no successful results to lay out") + } + cells := make([]contactCell, 0, len(results)) + for _, r := range results { + img, err := readPNG(r.ImagePath) + if err != nil { + return fmt.Errorf("read %s: %w", r.ImagePath, err) + } + cells = append(cells, contactCell{ + Image: img, + Label: r.Backend, + SubLabel: fmt.Sprintf("%dms · seed %d", r.LatencyMs, r.Seed), + }) + } + + cols := len(cells) + if cols > 4 { + cols = 2 + } + rows := (len(cells) + cols - 1) / cols + + const labelH = 64 + const pad = 16 + + cellW := cells[0].Image.Bounds().Dx() + cellH := cells[0].Image.Bounds().Dy() + for _, c := range cells { + if w := c.Image.Bounds().Dx(); w > cellW { + cellW = w + } + if h := c.Image.Bounds().Dy(); h > cellH { + cellH = h + } + } + + totalW := cols*cellW + (cols+1)*pad + totalH := rows*(cellH+labelH) + (rows+1)*pad + 48 // header band + + canvas := image.NewRGBA(image.Rect(0, 0, totalW, totalH)) + draw.Draw(canvas, canvas.Bounds(), &image.Uniform{C: color.RGBA{R: 30, G: 30, B: 35, A: 255}}, image.Point{}, draw.Src) + + // Header: show the truncated prompt. + headerText := "imagen compare — " + truncate(prompt, 100) + drawText(canvas, headerText, pad, 30, color.RGBA{R: 240, G: 240, B: 245, A: 255}) + + for i, c := range cells { + col := i % cols + row := i / cols + x0 := pad + col*(cellW+pad) + y0 := 48 + pad + row*(cellH+labelH+pad) + // Center the image inside the cell when smaller than the max cell size. + iw := c.Image.Bounds().Dx() + ih := c.Image.Bounds().Dy() + offX := (cellW - iw) / 2 + offY := (cellH - ih) / 2 + dstRect := image.Rect(x0+offX, y0+offY, x0+offX+iw, y0+offY+ih) + draw.Draw(canvas, dstRect, c.Image, c.Image.Bounds().Min, draw.Src) + + // Label band underneath. + labelY := y0 + cellH + 20 + drawText(canvas, c.Label, x0+8, labelY, color.RGBA{R: 250, G: 250, B: 250, A: 255}) + drawText(canvas, c.SubLabel, x0+8, labelY+22, color.RGBA{R: 180, G: 180, B: 190, A: 255}) + } + + f, err := os.Create(path) + if err != nil { + return fmt.Errorf("create %s: %w", path, err) + } + defer f.Close() + return png.Encode(f, canvas) +} + +type contactCell struct { + Image image.Image + Label string + SubLabel string +} + +func readPNG(path string) (image.Image, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + img, _, err := image.Decode(f) + return img, err +} + +func drawText(dst *image.RGBA, s string, x, y int, c color.Color) { + drawer := &font.Drawer{ + Dst: dst, + Src: &image.Uniform{C: c}, + Face: basicfont.Face7x13, + Dot: fixed.Point26_6{X: fixed.I(x), Y: fixed.I(y)}, + } + drawer.DrawString(s) +} + +func truncate(s string, max int) string { + if len(s) <= max { + return s + } + return s[:max-1] + "…" +} + +func splitCSV(s string) []string { + parts := strings.Split(s, ",") + out := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + out = append(out, p) + } + } + return out +} + +func metaInt64(m map[string]any, key string) int64 { + v, ok := m[key] + if !ok { + return 0 + } + switch n := v.(type) { + case int64: + return n + case int: + return int64(n) + case float64: + return int64(n) + } + return 0 +} diff --git a/cmd/imagen/compare_test.go b/cmd/imagen/compare_test.go new file mode 100644 index 0000000..cb9152b --- /dev/null +++ b/cmd/imagen/compare_test.go @@ -0,0 +1,203 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "image/png" + "os" + "path/filepath" + "strings" + "testing" +) + +// runCompareWithEnv runs the compare subcommand in a writable tmpdir, with +// XDG_CONFIG_HOME pointing somewhere empty so no host imagen.yaml leaks in. +func runCompareWithEnv(t *testing.T, args []string) (stderr, stdout *bytes.Buffer, runDir string, err error) { + t.Helper() + tmp := t.TempDir() + t.Setenv("XDG_CONFIG_HOME", filepath.Join(tmp, "no-config")) + t.Setenv("HOME", tmp) + + out := filepath.Join(tmp, "compare") + // stdlib flag parsing requires flags after the leading positional. Append + // --output at the end so any caller-supplied flags still parse cleanly. + args = append(args, "--output", out) + + // Capture stdout/stderr via os pipes since runCompare writes directly. + oldStdout := os.Stdout + oldStderr := os.Stderr + rOut, wOut, _ := os.Pipe() + rErr, wErr, _ := os.Pipe() + os.Stdout = wOut + os.Stderr = wErr + defer func() { + os.Stdout = oldStdout + os.Stderr = oldStderr + }() + + cmdErr := runCompare(context.Background(), args) + + _ = wOut.Close() + _ = wErr.Close() + stdout = &bytes.Buffer{} + stderr = &bytes.Buffer{} + _, _ = stdout.ReadFrom(rOut) + _, _ = stderr.ReadFrom(rErr) + + entries, _ := os.ReadDir(out) + if len(entries) == 1 { + runDir = filepath.Join(out, entries[0].Name()) + } + return stderr, stdout, runDir, cmdErr +} + +func TestCompareHappyPathWithMockBackends(t *testing.T) { + // Two mock instances stand in for two different backends. mock ignores + // cfg so we can reuse the registered type as the instance name and skip + // writing imagen.yaml entirely. + stderr, stdout, runDir, err := runCompareWithEnv(t, []string{ + "a cat in a fishbowl", + "--models", "mock,mock", + "--size", "64x64", + "--seed", "42", + }) + if err != nil { + t.Fatalf("runCompare: %v\nstderr: %s", err, stderr.String()) + } + + if runDir == "" { + t.Fatal("expected a run directory under --output") + } + // Sidecar JSON + sidecar := filepath.Join(runDir, "compare.json") + data, err := os.ReadFile(sidecar) + if err != nil { + t.Fatalf("read sidecar: %v", err) + } + var body struct { + Prompt string `json:"prompt"` + Successful int `json:"successful"` + Total int `json:"total"` + Results []struct { + Backend string `json:"backend"` + ImagePath string `json:"image_path"` + Error string `json:"error"` + } `json:"results"` + } + if err := json.Unmarshal(data, &body); err != nil { + t.Fatalf("parse sidecar: %v\n%s", err, data) + } + if body.Prompt != "a cat in a fishbowl" { + t.Errorf("prompt = %q", body.Prompt) + } + if body.Total != 2 || body.Successful != 2 { + t.Errorf("counts = %d successful / %d total", body.Successful, body.Total) + } + for _, r := range body.Results { + if r.Error != "" { + t.Errorf("backend %s errored: %s", r.Backend, r.Error) + } + if _, err := os.Stat(r.ImagePath); err != nil { + t.Errorf("image not on disk for %s: %v", r.Backend, err) + } + } + + // Contact sheet path was printed on stdout. + sheet := strings.TrimSpace(stdout.String()) + if sheet == "" { + t.Fatal("expected contact sheet path on stdout") + } + f, err := os.Open(sheet) + if err != nil { + t.Fatalf("open contact sheet: %v", err) + } + defer f.Close() + img, err := png.Decode(f) + if err != nil { + t.Fatalf("decode contact sheet PNG: %v", err) + } + if w := img.Bounds().Dx(); w < 100 { + t.Errorf("contact sheet looks empty (width %d)", w) + } +} + +func TestCompareSkipContactSheet(t *testing.T) { + stderr, stdout, runDir, err := runCompareWithEnv(t, []string{ + "x", + "--models", "mock", + "--size", "32x32", + "--seed", "1", + "--no-contact-sheet", + }) + if err != nil { + t.Fatalf("runCompare: %v\nstderr: %s", err, stderr.String()) + } + if got := strings.TrimSpace(stdout.String()); got != "" { + t.Errorf("expected no stdout output (no contact sheet), got %q", got) + } + if _, err := os.Stat(filepath.Join(runDir, "contact-sheet.png")); err == nil { + t.Errorf("contact-sheet.png should not exist with --no-contact-sheet") + } +} + +func TestCompareRecordsBackendErrors(t *testing.T) { + // One real (mock) + one unknown. Unknown should fail but not abort the + // run — sidecar records both, contact sheet built from successes only. + stderr, _, runDir, err := runCompareWithEnv(t, []string{ + "y", + "--models", "mock,this-instance-does-not-exist", + "--size", "32x32", + }) + if err != nil { + t.Fatalf("runCompare: %v\nstderr: %s", err, stderr.String()) + } + sidecar := filepath.Join(runDir, "compare.json") + data, _ := os.ReadFile(sidecar) + var body struct { + Successful int `json:"successful"` + Total int `json:"total"` + Results []struct { + Backend string `json:"backend"` + Error string `json:"error"` + } `json:"results"` + } + if err := json.Unmarshal(data, &body); err != nil { + t.Fatalf("parse sidecar: %v", err) + } + if body.Total != 2 { + t.Errorf("expected 2 results, got %d", body.Total) + } + if body.Successful != 1 { + t.Errorf("expected 1 success, got %d", body.Successful) + } + var sawError bool + for _, r := range body.Results { + if r.Backend == "this-instance-does-not-exist" && r.Error != "" { + sawError = true + } + } + if !sawError { + t.Errorf("expected an error recorded for the unknown backend") + } +} + +func TestCompareNoModelsFails(t *testing.T) { + _, _, _, err := runCompareWithEnv(t, []string{"x"}) + if err == nil { + t.Fatal("expected error when --models is empty") + } + if !strings.Contains(err.Error(), "--models") { + t.Errorf("error should mention --models, got %v", err) + } +} + +func TestCompareNoPromptFails(t *testing.T) { + _, _, _, err := runCompareWithEnv(t, []string{"--models", "mock"}) + if err == nil { + t.Fatal("expected error when prompt is missing") + } + if !strings.Contains(err.Error(), "missing prompt") { + t.Errorf("error should mention missing prompt, got %v", err) + } +} diff --git a/cmd/imagen/main.go b/cmd/imagen/main.go index 3913e44..b39151f 100644 --- a/cmd/imagen/main.go +++ b/cmd/imagen/main.go @@ -18,6 +18,8 @@ const helpText = `imagen — model-agnostic image generation Usage: imagen generate [flags] generate one image + imagen compare --models a,b,c [flags] + run one prompt across N backends + contact sheet imagen worker [flags] consume the imagen.jobs queue (daemon) imagen backends list registered backend types imagen config init print a sample imagen.yaml on stdout @@ -46,6 +48,8 @@ func main() { switch os.Args[1] { case "generate": err = runGenerate(ctx, args) + case "compare": + err = runCompare(ctx, args) case "worker": err = runWorker(ctx, args) case "backends": diff --git a/docs/backends.md b/docs/backends.md new file mode 100644 index 0000000..cf1ea20 --- /dev/null +++ b/docs/backends.md @@ -0,0 +1,310 @@ +# ImaGen backends + +This document covers the local-ComfyUI backend plug-in story: how adapters +are layered, how to add a new model without touching Go, and the per-model +setup steps for the bundled templates. + +For the host-side ComfyUI install (mRock — venv, weights for the default +FLUX.1-schnell, systemd, VRAM coexistence with Ollama, smoke test against +the raw HTTP API), see [`setup-comfyui-mrock.md`](setup-comfyui-mrock.md). + +## Architecture: Path 1 — workflow-template adapter + +`imagen generate` and `imagen compare` dispatch through the `comfyui` +adapter, which holds the HTTP plumbing (`/prompt`, `/history/{id}`, `/view`, +`/system_stats`) and treats the workflow itself as data. Each backend +instance in `imagen.yaml` picks a workflow JSON via the `workflow:` key. +Adding a new model is yaml + JSON, never Go: + +``` +internal/backend/ + comfyui.go # one adapter, all ComfyUI models + workflow_template.go # loader + token-substitution + workflows/ + flux1-schnell.json # bundled templates (embedded with //go:embed) + flux2-klein.json + sd35-medium.json +``` + +### Why Path 1 over per-family adapters (`comfyui-flux.go`, `comfyui-sd3.go`…) + +- **Workflow JSON is the natural exchange format**. ComfyUI users export + workflows from its GUI as JSON. Anything else means rebuilding the graph + by hand in Go for every new model. +- **Adding a model is a config change, not a build change**. With Path 2, + every new family is a Go file, a new test file, a registry entry, a new + worker binary, a redeploy. Path 1 lets us land a new model with one yaml + block + one JSON file + one section in this doc. +- **The HTTP plumbing is identical across families**. `/prompt`, + `/history`, `/view`, the retry policy, the "value not in list" hint, VRAM + reporting — none of it depends on the workflow shape. Path 2 would + duplicate that across files. +- **Failure isolation stays clean**. The workflow loader fails at adapter + construction (`imagen backends` surfaces the error), the HTTP layer + fails at `Generate`, and ComfyUI's own validation surfaces missing-model + hints. Each layer's error message points at the right config knob. + +Path 2's argument was "each family owns its quirks (samplers, schedulers, +dual-stage etc.)". That argument doesn't survive contact with the +substitution-map design: per-family knobs are just key/value fields in the +yaml block and `${shift}`/`${guidance}`/`${cfg}` placeholders in the +template. No code duplication, no inheritance to debug. + +### Token substitution + +`workflow_template.SubstituteWorkflow` walks the parsed JSON and replaces +every whole-value string of the form `"${key}"` with the typed value from +the substitution map. Numbers stay numbers, strings stay strings — no +round-tripping through `strings.Replace`. + +The substitution map is built per call from: + +1. **Request fields** (always present): `${prompt}`, `${negative}`, + `${width}`, `${height}`, `${seed}`, `${steps}`, `${sampler}`, + `${scheduler}`, `${cfg}`. +2. **Every scalar field from the yaml block** (string / int / int64 / + float64 / bool), minus framework keys (`type`, `base_url`, `workflow`, + `default_*`). So `${vae}`, `${clip}`, `${clip_l}`, `${clip_t5}`, + `${dtype}`, `${shift}`, `${guidance}` all become substitutable just by + being in yaml. +3. **Sensible defaults** for the common optional knobs above, so a + workflow that references `${dtype}` without the user setting one in + yaml still substitutes cleanly (`fp8_e4m3fn` for FLUX, `3.0` for SD3 + shift, etc.). Extra defaults are ignored by workflows that don't + reference them. + +Partial matches (e.g. `"prefix ${prompt} suffix"`) are deliberately **not** +substituted — the placeholder must be the entire value so we can preserve +its JSON type. This prevents a prompt containing literal `${seed}` text +from corrupting the workflow. + +Unknown placeholders (referenced in JSON but missing from the substitution +map) error out before the workflow leaves the binary. + +### Back-compat + +The `workflow:` field defaults to `flux1-schnell` if omitted. Existing +yaml blocks like the pre-#10 FLUX.1-schnell instance: + +```yaml +flux-schnell-local: + type: comfyui + base_url: http://mrock:8188 + model: flux1-schnell.safetensors +``` + +still work unchanged — they implicitly pick up the migrated +`flux1-schnell.json` template, which keeps the same node IDs (6, 8, 9, 10, +11, 12, 13, 27, 30, 31) as the historical hardcoded workflow. + +## Bundled workflows + +### FLUX.1-schnell — the back-compat default + +| Field | Default | Notes | +|---|---|---| +| `model` | `flux1-schnell.safetensors` | drop in `models/unet/` | +| `vae` | `ae.safetensors` | `models/vae/` | +| `clip_l` | `clip_l.safetensors` | `models/clip/` | +| `clip_t5` | `t5xxl_fp8_e4m3fn.safetensors` | `models/clip/` | +| `dtype` | `fp8_e4m3fn` | weight dtype for the UNet loader | +| `default_steps` / `default_cfg` | 4 / 1.0 | schnell is distilled to ~4 steps | + +VRAM peak ~10–12 GB at 1024×1024. Install path: +[`setup-comfyui-mrock.md`](setup-comfyui-mrock.md). Already shipping. + +### FLUX.2 [klein] 4B — direct upgrade + +Released by Black Forest Labs late 2025 / early 2026, BFL non-commercial +license. The distilled 4B "klein" variant lands sub-second on the RTX +4070 Ti SUPER and shares the new Qwen-based text encoder + a re-trained +VAE with the larger family. + +```yaml +flux2-klein-local: + type: comfyui + base_url: http://mrock:8188 + workflow: flux2-klein + model: flux-2-klein-base-4b-fp8.safetensors # models/unet/ + vae: flux2-vae.safetensors # models/vae/ + clip: qwen_3_4b.safetensors # models/text_encoders/ + dtype: fp8_e4m3fn + default_steps: 4 + default_cfg: 1.0 + guidance: 4.0 +``` + +**Model downloads** (on mRock, ungated mirrors when available): + +```bash +cd ~/dev/comfyui/models +curl -L -o unet/flux-2-klein-base-4b-fp8.safetensors \ + https://huggingface.co/black-forest-labs/FLUX.2-klein/resolve/main/flux-2-klein-base-4b-fp8.safetensors +curl -L -o vae/flux2-vae.safetensors \ + https://huggingface.co/black-forest-labs/FLUX.2-klein/resolve/main/flux2-vae.safetensors +mkdir -p text_encoders +curl -L -o text_encoders/qwen_3_4b.safetensors \ + https://huggingface.co/black-forest-labs/FLUX.2-klein/resolve/main/qwen_3_4b.safetensors +``` + +BFL's primary repo is gated; if `curl` returns 401, configure an HF token +in `~/.cache/huggingface/token` or use one of the community mirrors +(check the official model card for the current list). The filenames the +template references match BFL's canonical names — rename downloads to +match if a mirror uses different ones. + +VRAM peak: ~8.5 GB (4B fp8). With Ollama parked at ~8 GB this still fits; +unlike FLUX.1-schnell, klein doesn't require stopping Ollama on mRock. + +### SD3.5-medium — single-checkpoint variant + +Stability AI's 2.5B mid-size model with bundled text encoders. The +`incl_clips_t5xxlfp8scaled` variant ships clip_g + clip_l + t5xxl_fp8 all +in one `.safetensors`, so the workflow uses `CheckpointLoaderSimple` +instead of separate UNet/VAE/CLIP loaders. + +```yaml +sd35-medium-local: + type: comfyui + base_url: http://mrock:8188 + workflow: sd35-medium + model: sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors # models/checkpoints/ + default_steps: 28 + default_sampler: dpmpp_2m + default_scheduler: sgm_uniform + default_cfg: 4.5 + shift: 3.0 +``` + +**Model download** (on mRock): + +```bash +cd ~/dev/comfyui/models +curl -L -o checkpoints/sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors \ + https://huggingface.co/stabilityai/stable-diffusion-3.5-medium/resolve/main/sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors +``` + +VRAM peak: ~9.9 GB at 1024×1024. Same envelope as FLUX.1-schnell — stop +Ollama before generating, restart after. + +## Adding a new bundled workflow + +1. **Export from ComfyUI**: load the model in the ComfyUI GUI, build a + text-to-image workflow that produces what you want, "Save (API + Format)" — the file you get is the right shape. +2. **Sprinkle placeholders**: open the JSON and replace per-call values + with `${name}` tokens. Whole-value substitution only: + + ```json + "inputs": { + "text": "${prompt}", // was "a cat sitting on a chair" + "seed": "${seed}", // was 1234567 + "steps": "${steps}", // was 28 + "cfg": "${cfg}", + "sampler_name": "${sampler}", + "scheduler": "${scheduler}", + "width": "${width}", + "height": "${height}" + } + ``` + + Use `${model}` for the checkpoint / unet filename and any per-template + knobs (`${vae}`, `${shift}`, `${guidance}`, `${clip}` …). +3. **Drop it into `internal/backend/workflows/.json`**. The + `//go:embed workflows/*.json` directive in `workflow_template.go` + picks it up at build time — no registry entry needed. +4. **Add a yaml instance** in `internal/config/config.go`'s `Sample` block + for `imagen config init` (and `~/.config/imagen.yaml`) so users + discover the new backend. +5. **Document the model files + HF download URLs** in this doc. +6. **Smoke test**: `imagen generate "test" --backend + --size 1024x1024` should produce an image. + +Per-call overrides for sampler/scheduler/cfg go via `--steps`, `--seed`, +and (programmatic) `backend.Request.BackendOpts["sampler"]` / +`["scheduler"]` / `["cfg"]`. The compare harness forwards the +constant-across-backends knobs verbatim. + +## Loading a workflow from disk (one-off) + +Pass an absolute filesystem path as `workflow:` and the adapter reads it +from disk instead of the embedded FS. Handy for prototyping a new model +before committing it: + +```yaml +my-experimental: + type: comfyui + base_url: http://mrock:8188 + workflow: /home/m/dev/comfyui/workflows/my-test.json + model: my-test-model.safetensors +``` + +The fallback chain is: filesystem path (if the string looks like a path +or ends in `.json`), then bundled lookup by name, then bundled lookup +with `.json` appended. + +## `imagen compare`: cross-backend evaluation + +```bash +imagen compare "a wizard casting a spell" \ + --models flux-schnell-local,flux2-klein-local,sd35-medium-local \ + --size 1024x1024 \ + --output ~/Pictures/imagen/compare +``` + +Per run, `compare`: + +- creates `/-/` +- dispatches each named backend sequentially (mRock has one GPU; parallel + would OOM) — one backend's failure doesn't abort the run +- writes per-backend PNGs as `--.png` +- writes `compare.json` listing every attempt (success + failure) with + per-model `seed`, `latency_ms`, `model`, `vram_used_mib`, full + `metadata` map, and the error string for any failure +- composites a `contact-sheet.png` with the prompt as header and each + cell labelled `` / `ms · seed ` + +Flags mirror `generate`: `--seed`, `--steps`, `--style`, `--negative`, +`--size` are shared across all backends. `--no-contact-sheet` skips the +composite when only the per-image PNGs and sidecar matter (e.g. for a +worker script that builds its own diff view). + +## Diagnostics + +`imagen backends` shows every instance with its registration state. For +local ComfyUI, the status is currently just `registered` (we don't probe +the upstream HTTP endpoint at startup — the boot-helper hint kicks in on +first generation if mRock is asleep). + +Per-backend errors emit at most three kinds: + +1. **Adapter construction failure** (e.g. workflow JSON not found, + missing required yaml field). Caught at `buildBackend` time: + `imagen: backend "": `. +2. **HTTP / runtime failure during Generate**. Wrapped with the boot + helper for `connection refused`/`no such host`/timeouts pointing at + `boot-whitetower mrock` so a sleeping mRock has an obvious next step. +3. **ComfyUI workflow-validation failure** (200-with-node_errors or 400). + Surfaces with a model-not-found hint (matching `value_not_in_list` + + `unet_name`/`ckpt_name`) when applicable, pointing back at this doc. + +## Worker daemon notes + +`imagen worker` (the `imagen.jobs` queue consumer) uses the same adapter ++ workflow lookup as the synchronous CLI — flexsiebels' `/imagine` UI +INSERTs a `backend = ` row, the worker claims it, and the +underlying ComfyUI HTTP calls are identical to what `generate` makes. No +worker-specific changes are required when a new backend lands; the +config + workflow are the only state that has to be present on the +worker host. + +After merging a new template or yaml block: + +```bash +# On the worker host (mRiver today): +systemctl --user restart imagen-worker +``` + +The daemon-rebuild trap from issue #9 still applies: if you build the +imagen binary on the dev machine and `scp` it over, restart the unit so +systemd picks up the new ELF. diff --git a/docs/usage.md b/docs/usage.md index 75585fc..a69c93c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -4,14 +4,21 @@ ``` imagen generate [flags] generate one image +imagen compare --models a,b,c [flags] + run one prompt across N backends + contact sheet +imagen worker [flags] consume the imagen.jobs queue (daemon) imagen backends list configured + registered backends imagen config init print a sample imagen.yaml on stdout imagen config validate parse + validate the active config imagen config path print the resolved config path imagen serve [--addr :8080] (stub) start the HTTP server +imagen usage [--since DATE] show cost-tracking rows imagen version print version ``` +For the per-backend setup (FLUX.1, FLUX.2 [klein], SD3.5 medium, …) and +the architecture rationale, see [`backends.md`](backends.md). + ## `generate` flags | Flag | Default | Notes | diff --git a/go.mod b/go.mod index 5cd50ab..1d9db0b 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.25.0 require ( github.com/jackc/pgx/v5 v5.9.2 + golang.org/x/image v0.40.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -12,5 +13,5 @@ require ( github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/kr/text v0.2.0 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect - golang.org/x/text v0.29.0 // indirect + golang.org/x/text v0.37.0 // indirect ) diff --git a/go.sum b/go.sum index 7af349d..70d691b 100644 --- a/go.sum +++ b/go.sum @@ -23,10 +23,12 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= -golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= -golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= -golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/image v0.40.0 h1:Tw4GyDXMo+daZN1znreBRC3VayR1aLFUyUEOLUdW1a8= +golang.org/x/image v0.40.0/go.mod h1:uIc348UZMSvS5Z65CVZ7iDPaNobNFEPeJ4kbqTOszmA= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= +golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/internal/backend/comfyui.go b/internal/backend/comfyui.go index 442e1e2..fdfd50e 100644 --- a/internal/backend/comfyui.go +++ b/internal/backend/comfyui.go @@ -20,24 +20,29 @@ import ( const ComfyType = "comfyui" // Comfy is the ComfyUI adapter. It speaks the public `/prompt` + `/history` -// + `/view` HTTP API and submits a fixed FLUX.1 schnell workflow built from -// the values in Request. +// + `/view` HTTP API and submits a workflow built by substituting Request +// values into a JSON template (bundled under internal/backend/workflows/ or +// loaded from a filesystem path). // // Concurrency: a single Comfy is safe to share across goroutines as long as // the underlying http.Client is. Generate does not hold long-lived state. type Comfy struct { instance string - base string - model string - vae string - clipL string - clipT5 string - dtype string + base string + workflow string + + // rawCfg keeps the original yaml block (minus framework keys) so we can + // expose every user-defined string/number as a workflow substitution + // without enumerating each per-model knob in Go. Empty values still get + // a substitution entry so a template can reference ${negative} when the + // request didn't pass one. + rawCfg map[string]any defaultSteps int defaultSampler string defaultScheduler string + defaultCFG float64 httpClient *http.Client pollInterval time.Duration @@ -49,12 +54,20 @@ type Comfy struct { } // NewComfy is the registry constructor. cfg is the adapter's slice of -// imagen.yaml. Required keys: base_url, model. The rest have sensible FLUX -// schnell defaults. +// imagen.yaml. +// +// Required keys: base_url, model. +// Optional keys: workflow (defaults to "flux1-schnell" for back-compat with +// existing configs), default_steps, default_sampler, default_scheduler, +// default_cfg, plus any template-specific knobs (vae, clip, clip_l, +// clip_t5, dtype, shift, guidance, …) the chosen workflow references. func NewComfy(name string, cfg map[string]any) (Backend, error) { if name == "" { return nil, fmt.Errorf("comfyui: empty instance name") } + if cfg == nil { + cfg = map[string]any{} + } base := strings.TrimRight(getString(cfg, "base_url", ""), "/") if base == "" { return nil, fmt.Errorf("comfyui[%s]: base_url is required", name) @@ -67,23 +80,27 @@ func NewComfy(name string, cfg map[string]any) (Backend, error) { return nil, fmt.Errorf("comfyui[%s]: model is required", name) } + workflow := getString(cfg, "workflow", "flux1-schnell") + // Fail fast on a bad workflow ref so users see the error at startup, + // not on first /prompt submission. + if _, err := LoadWorkflowTemplate(workflow); err != nil { + return nil, fmt.Errorf("comfyui[%s]: %w", name, err) + } + c := &Comfy{ instance: name, base: base, - model: model, - - vae: getString(cfg, "vae", "ae.safetensors"), - clipL: getString(cfg, "clip_l", "clip_l.safetensors"), - clipT5: getString(cfg, "clip_t5", "t5xxl_fp8_e4m3fn.safetensors"), - dtype: getString(cfg, "weight_dtype", "fp8_e4m3fn"), + workflow: workflow, + rawCfg: cfg, defaultSteps: getInt(cfg, "default_steps", 4), defaultSampler: getString(cfg, "default_sampler", "euler"), defaultScheduler: getString(cfg, "default_scheduler", "simple"), + defaultCFG: getFloat(cfg, "default_cfg", 1.0), httpClient: &http.Client{Timeout: 60 * time.Second}, pollInterval: 250 * time.Millisecond, - pollTimeout: 120 * time.Second, + pollTimeout: 300 * time.Second, randSeed: cryptoSeed, clientIDFn: randClientID, @@ -103,19 +120,26 @@ func (c *Comfy) Generate(ctx context.Context, req Request) (*Result, error) { sampler := c.defaultSampler scheduler := c.defaultScheduler + cfg := c.defaultCFG if v, ok := req.BackendOpts["sampler"].(string); ok && v != "" { sampler = v } if v, ok := req.BackendOpts["scheduler"].(string); ok && v != "" { scheduler = v } + if v, ok := req.BackendOpts["cfg"].(float64); ok && v > 0 { + cfg = v + } seed := req.Seed if seed == 0 { seed = c.randSeed() } - workflow := c.buildWorkflow(req.Prompt, req.NegativePrompt, width, height, seed, steps, sampler, scheduler) + workflow, err := c.buildWorkflow(req.Prompt, req.NegativePrompt, width, height, seed, steps, sampler, scheduler, cfg) + if err != nil { + return nil, fmt.Errorf("comfyui[%s]: build workflow: %w", c.instance, err) + } clientID := c.clientIDFn() start := time.Now() @@ -133,14 +157,17 @@ func (c *Comfy) Generate(ctx context.Context, req Request) (*Result, error) { } latencyMs := time.Since(start).Milliseconds() + model := getString(c.rawCfg, "model", "") meta := map[string]any{ "backend": c.instance, "backend_type": ComfyType, - "model": c.model, + "workflow": c.workflow, + "model": model, "seed": seed, "steps": steps, "sampler": sampler, "scheduler": scheduler, + "cfg": cfg, "width": width, "height": height, "latency_ms": latencyMs, @@ -173,6 +200,7 @@ func (c *Comfy) submitPrompt(ctx context.Context, workflow map[string]any, clien return "", fmt.Errorf("comfyui: marshal workflow: %w", err) } + model := getString(c.rawCfg, "model", "") var lastErr error for attempt := range 2 { if attempt > 0 { @@ -196,7 +224,7 @@ func (c *Comfy) submitPrompt(ctx context.Context, workflow map[string]any, clien _ = resp.Body.Close() switch { case resp.StatusCode >= 200 && resp.StatusCode < 300: - return parsePromptID(respBody, c.model) + return parsePromptID(respBody, model) case resp.StatusCode >= 500: lastErr = fmt.Errorf("comfyui /prompt %d: %s", resp.StatusCode, snip(respBody)) continue @@ -333,98 +361,74 @@ func (c *Comfy) connError(err error) error { // workflow-validation failures and put the diagnostics in node_errors; older // builds use 200 + node_errors. This handles the 4xx flavour. func (c *Comfy) classifyBadRequest(status int, body []byte) error { - if hint, ok := missingModelHint(body, c.model); ok { - return fmt.Errorf("comfyui /prompt %d: %s — see docs/setup-comfyui-mrock.md", status, hint) + model := getString(c.rawCfg, "model", "") + if hint, ok := missingModelHint(body, model); ok { + return fmt.Errorf("comfyui /prompt %d: %s — see docs/backends.md", status, hint) } return fmt.Errorf("comfyui /prompt %d: %s", status, snip(body)) } -// buildWorkflow assembles the canonical FLUX.1 schnell ComfyUI workflow, -// node-IDs matching the upstream "flux-schnell" template so anyone debugging -// in the ComfyUI UI sees a familiar shape. -func (c *Comfy) buildWorkflow(prompt, negative string, w, h int, seed int64, steps int, sampler, scheduler string) map[string]any { - return map[string]any{ - "6": map[string]any{ - "class_type": "CLIPTextEncode", - "inputs": map[string]any{ - "text": prompt, - "clip": []any{"11", 0}, - }, - }, - "8": map[string]any{ - "class_type": "VAEDecode", - "inputs": map[string]any{ - "samples": []any{"31", 0}, - "vae": []any{"10", 0}, - }, - }, - "9": map[string]any{ - "class_type": "SaveImage", - "inputs": map[string]any{ - "filename_prefix": "imagen", - "images": []any{"8", 0}, - }, - }, - "10": map[string]any{ - "class_type": "VAELoader", - "inputs": map[string]any{"vae_name": c.vae}, - }, - "11": map[string]any{ - "class_type": "DualCLIPLoader", - "inputs": map[string]any{ - "clip_name1": c.clipT5, - "clip_name2": c.clipL, - "type": "flux", - }, - }, - "12": map[string]any{ - "class_type": "UNETLoader", - "inputs": map[string]any{ - "unet_name": c.model, - "weight_dtype": c.dtype, - }, - }, - "13": map[string]any{ - "class_type": "CLIPTextEncode", - "inputs": map[string]any{ - "text": negative, - "clip": []any{"11", 0}, - }, - }, - "27": map[string]any{ - "class_type": "EmptySD3LatentImage", - "inputs": map[string]any{ - "width": w, - "height": h, - "batch_size": 1, - }, - }, - "30": map[string]any{ - "class_type": "ModelSamplingFlux", - "inputs": map[string]any{ - "model": []any{"12", 0}, - "max_shift": 1.15, - "base_shift": 0.5, - "width": w, - "height": h, - }, - }, - "31": map[string]any{ - "class_type": "KSampler", - "inputs": map[string]any{ - "model": []any{"30", 0}, - "seed": seed, - "steps": steps, - "cfg": 1.0, - "sampler_name": sampler, - "scheduler": scheduler, - "denoise": 1.0, - "positive": []any{"6", 0}, - "negative": []any{"13", 0}, - "latent_image": []any{"27", 0}, - }, - }, +// buildWorkflow loads the configured workflow template and substitutes the +// per-call placeholders (prompt, seed, sampler, …) plus any string/number +// fields the user defined in the yaml block. The set of placeholder keys +// that aren't in `subs` produces an error from SubstituteWorkflow. +func (c *Comfy) buildWorkflow(prompt, negative string, w, h int, seed int64, steps int, sampler, scheduler string, cfg float64) (map[string]any, error) { + wf, err := LoadWorkflowTemplate(c.workflow) + if err != nil { + return nil, err } + subs := map[string]any{ + "prompt": prompt, + "negative": negative, + "width": w, + "height": h, + "seed": seed, + "steps": steps, + "sampler": sampler, + "scheduler": scheduler, + "cfg": cfg, + } + // Surface every scalar field from the yaml block so per-template knobs + // (vae, clip, clip_l, clip_t5, dtype, shift, guidance, …) work without + // adapter-code changes. Framework keys are excluded. + for k, v := range c.rawCfg { + switch k { + case "type", "base_url", "workflow", + "default_steps", "default_sampler", "default_scheduler", "default_cfg": + continue + } + if _, alreadySet := subs[k]; alreadySet { + // A per-call var (e.g. ${prompt}) beats anything yaml put under + // the same key — yaml can't shadow request-derived values. + continue + } + switch v := v.(type) { + case string, int, int64, float64, bool: + subs[k] = v + } + } + // Provide sensible defaults for common optional knobs so a workflow that + // references one of these doesn't fail substitution when the user + // didn't override it in yaml. Extra keys are ignored if the workflow + // doesn't reference them, so it's safe to always set the lot. + defaults := map[string]any{ + "vae": "ae.safetensors", + "clip_l": "clip_l.safetensors", + "clip_t5": "t5xxl_fp8_e4m3fn.safetensors", + "clip": "qwen_3_4b.safetensors", + "dtype": "fp8_e4m3fn", + "guidance": 4.0, + "shift": 3.0, + } + for k, v := range defaults { + if _, ok := subs[k]; !ok { + subs[k] = v + } + } + if _, err := SubstituteWorkflow(wf, subs); err != nil { + return nil, err + } + return wf, nil } // parsePromptID handles the 2xx /prompt response. ComfyUI sometimes 200s a @@ -432,8 +436,8 @@ func (c *Comfy) buildWorkflow(prompt, negative string, w, h int, seed int64, ste // turns that into the same user-facing error as a 4xx with the same body. func parsePromptID(body []byte, model string) (string, error) { var resp struct { - PromptID string `json:"prompt_id"` - NodeErrors map[string]any `json:"node_errors"` + PromptID string `json:"prompt_id"` + NodeErrors map[string]any `json:"node_errors"` Error json.RawMessage `json:"error"` } if err := json.Unmarshal(body, &resp); err != nil { @@ -441,7 +445,7 @@ func parsePromptID(body []byte, model string) (string, error) { } if len(resp.NodeErrors) > 0 || len(resp.Error) > 0 { if hint, ok := missingModelHint(body, model); ok { - return "", fmt.Errorf("comfyui /prompt: %s — see docs/setup-comfyui-mrock.md", hint) + return "", fmt.Errorf("comfyui /prompt: %s — see docs/backends.md", hint) } return "", fmt.Errorf("comfyui /prompt rejected workflow: %s", snip(body)) } @@ -489,15 +493,21 @@ func parseHistory(body []byte, promptID string) (string, bool, error) { } // missingModelHint returns a user-actionable message when the response body -// indicates the configured unet model isn't loaded on the server. ComfyUI -// uses both the human-readable "Value not in list" message and the enum -// "value_not_in_list" type — match either. +// indicates the configured unet/checkpoint model isn't loaded on the server. +// ComfyUI uses both the human-readable "Value not in list" message and the +// enum "value_not_in_list" type — match either. func missingModelHint(body []byte, model string) (string, bool) { s := string(body) hasMarker := strings.Contains(s, "Value not in list") || strings.Contains(s, "value_not_in_list") - if hasMarker && strings.Contains(s, "unet_name") { + if !hasMarker { + return "", false + } + if strings.Contains(s, "unet_name") { return fmt.Sprintf("model %q not present in the ComfyUI server's models/unet/", model), true } + if strings.Contains(s, "ckpt_name") { + return fmt.Sprintf("checkpoint %q not present in the ComfyUI server's models/checkpoints/", model), true + } return "", false } @@ -536,6 +546,22 @@ func getInt(m map[string]any, k string, def int) int { return def } +func getFloat(m map[string]any, k string, def float64) float64 { + if v, ok := m[k]; ok { + switch n := v.(type) { + case float64: + return n + case float32: + return float64(n) + case int: + return float64(n) + case int64: + return float64(n) + } + } + return def +} + func orDefaultInt(v, def int) int { if v == 0 { return def diff --git a/internal/backend/comfyui_test.go b/internal/backend/comfyui_test.go index 778961d..0d66879 100644 --- a/internal/backend/comfyui_test.go +++ b/internal/backend/comfyui_test.go @@ -312,7 +312,7 @@ func TestComfyMissingModelHintsAtSetupDoc(t *testing.T) { t.Fatal("expected error") } msg := err.Error() - if !strings.Contains(msg, "docs/setup-comfyui-mrock.md") { + if !strings.Contains(msg, "docs/backends.md") { t.Errorf("error should point at the setup doc, got %v", err) } if !strings.Contains(msg, "flux1-schnell.safetensors") { @@ -331,7 +331,7 @@ func TestComfyMissingModelOn200WithNodeErrors(t *testing.T) { if err == nil { t.Fatal("expected error for node_errors on 200") } - if !strings.Contains(err.Error(), "docs/setup-comfyui-mrock.md") { + if !strings.Contains(err.Error(), "docs/backends.md") { t.Errorf("error should point at the setup doc, got %v", err) } } diff --git a/internal/backend/workflow_template.go b/internal/backend/workflow_template.go new file mode 100644 index 0000000..806e655 --- /dev/null +++ b/internal/backend/workflow_template.go @@ -0,0 +1,156 @@ +package backend + +import ( + "embed" + "encoding/json" + "fmt" + "io/fs" + "maps" + "os" + "path/filepath" + "regexp" + "sort" + "strings" +) + +//go:embed workflows/*.json +var bundledWorkflows embed.FS + +// placeholderRE matches a single-token placeholder like "${prompt}" — the +// whole string value must be the placeholder, leading/trailing whitespace +// allowed. This lets us preserve types (a numeric substitution becomes a +// JSON number, not a stringified one) instead of round-tripping through +// strings.Replace which would force everything into a string. +var placeholderRE = regexp.MustCompile(`^\s*\$\{([a-zA-Z][a-zA-Z0-9_]*)\}\s*$`) + +// LoadWorkflowTemplate returns the parsed JSON for a workflow template. +// `name` is resolved in this order: +// +// 1. exact filesystem path that exists on disk (absolute or relative); +// 2. one of the bundled templates under internal/backend/workflows/ +// (with or without the .json suffix). +// +// The returned map is a fresh deep copy of the template; callers can mutate +// it freely. +func LoadWorkflowTemplate(name string) (map[string]any, error) { + if name == "" { + return nil, fmt.Errorf("workflow template name is empty") + } + raw, err := readWorkflowBytes(name) + if err != nil { + return nil, err + } + var wf map[string]any + if err := json.Unmarshal(raw, &wf); err != nil { + return nil, fmt.Errorf("workflow %s: parse: %w", name, err) + } + return wf, nil +} + +// BundledWorkflowNames returns the names of templates compiled into the +// binary, sorted. Each name is the basename without the .json suffix. +func BundledWorkflowNames() []string { + entries, err := fs.ReadDir(bundledWorkflows, "workflows") + if err != nil { + return nil + } + out := make([]string, 0, len(entries)) + for _, e := range entries { + n := e.Name() + if !strings.HasSuffix(n, ".json") { + continue + } + out = append(out, strings.TrimSuffix(n, ".json")) + } + sort.Strings(out) + return out +} + +func readWorkflowBytes(name string) ([]byte, error) { + // Filesystem path wins if it points at a real file. Lets a user override + // a bundled template by passing an absolute path in yaml. + if strings.ContainsRune(name, os.PathSeparator) || strings.HasSuffix(name, ".json") { + if b, err := os.ReadFile(name); err == nil { + return b, nil + } else if !os.IsNotExist(err) { + return nil, fmt.Errorf("workflow %s: %w", name, err) + } + } + // Bundled lookup. Try the literal name as a file inside workflows/, then + // with the .json suffix appended. + candidates := []string{ + filepath.Join("workflows", name), + filepath.Join("workflows", name+".json"), + } + for _, c := range candidates { + if b, err := bundledWorkflows.ReadFile(c); err == nil { + return b, nil + } + } + return nil, fmt.Errorf("workflow %q not found (bundled templates: %v)", name, BundledWorkflowNames()) +} + +// SubstituteWorkflow walks wf and replaces every "${key}" string with the +// matching value from subs, preserving JSON types. Returns the set of +// placeholder keys it actually touched, so the caller can detect missing +// substitutions even when a key is defined in subs but never referenced in +// the workflow (typical when a yaml block sets a knob a different template +// would consume). +// +// Unknown placeholders (referenced in the workflow but absent from subs) +// produce an error so we never submit a workflow with raw "${foo}" tokens. +func SubstituteWorkflow(wf map[string]any, subs map[string]any) (used map[string]struct{}, err error) { + used = make(map[string]struct{}) + walked, err := substituteValue(wf, subs, used) + if err != nil { + return nil, err + } + // substituteValue returns the replacement for the top-level value, which + // should still be the same map (just with mutated children). + if m, ok := walked.(map[string]any); ok { + // Copy back into wf so the caller's reference reflects the result. + for k := range wf { + delete(wf, k) + } + maps.Copy(wf, m) + } + return used, nil +} + +func substituteValue(v any, subs map[string]any, used map[string]struct{}) (any, error) { + switch x := v.(type) { + case map[string]any: + out := make(map[string]any, len(x)) + for k, child := range x { + replaced, err := substituteValue(child, subs, used) + if err != nil { + return nil, err + } + out[k] = replaced + } + return out, nil + case []any: + out := make([]any, len(x)) + for i, child := range x { + replaced, err := substituteValue(child, subs, used) + if err != nil { + return nil, err + } + out[i] = replaced + } + return out, nil + case string: + if m := placeholderRE.FindStringSubmatch(x); m != nil { + key := m[1] + val, ok := subs[key] + if !ok { + return nil, fmt.Errorf("workflow placeholder ${%s} has no substitution", key) + } + used[key] = struct{}{} + return val, nil + } + return x, nil + default: + return v, nil + } +} diff --git a/internal/backend/workflow_template_test.go b/internal/backend/workflow_template_test.go new file mode 100644 index 0000000..dbd8ffe --- /dev/null +++ b/internal/backend/workflow_template_test.go @@ -0,0 +1,153 @@ +package backend + +import ( + "os" + "path/filepath" + "slices" + "strings" + "testing" +) + +func TestBundledWorkflowsParseable(t *testing.T) { + names := BundledWorkflowNames() + if len(names) == 0 { + t.Fatal("expected at least one bundled workflow") + } + mustHave := []string{"flux1-schnell", "flux2-klein", "sd35-medium"} + for _, want := range mustHave { + if !slices.Contains(names, want) { + t.Errorf("bundled workflows missing %q (have: %v)", want, names) + } + } + // Every bundled template must parse and contain at least one node. + for _, n := range names { + wf, err := LoadWorkflowTemplate(n) + if err != nil { + t.Errorf("LoadWorkflowTemplate(%q): %v", n, err) + continue + } + if len(wf) == 0 { + t.Errorf("workflow %q has zero nodes", n) + } + } +} + +func TestLoadWorkflowFromFilesystem(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "custom.json") + body := `{"1":{"class_type":"X","inputs":{"v":"${prompt}"}}}` + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatalf("write tmp workflow: %v", err) + } + wf, err := LoadWorkflowTemplate(path) + if err != nil { + t.Fatalf("load from path: %v", err) + } + if _, ok := wf["1"]; !ok { + t.Errorf("custom workflow missing node 1") + } +} + +func TestLoadWorkflowUnknownNameErrors(t *testing.T) { + _, err := LoadWorkflowTemplate("definitely-not-a-real-workflow") + if err == nil { + t.Fatal("expected error for unknown workflow name") + } + if !strings.Contains(err.Error(), "not found") { + t.Errorf("error should say not found, got %v", err) + } +} + +func TestSubstituteWorkflowPreservesTypes(t *testing.T) { + wf := map[string]any{ + "31": map[string]any{ + "class_type": "KSampler", + "inputs": map[string]any{ + "seed": "${seed}", + "steps": "${steps}", + "text": "${prompt}", + "cfg": "${cfg}", + }, + }, + } + subs := map[string]any{ + "seed": int64(42), + "steps": 11, + "prompt": "a cat", + "cfg": 4.5, + } + used, err := SubstituteWorkflow(wf, subs) + if err != nil { + t.Fatalf("Substitute: %v", err) + } + if len(used) != 4 { + t.Errorf("used = %v, want all four", used) + } + inputs := wf["31"].(map[string]any)["inputs"].(map[string]any) + if seed, ok := inputs["seed"].(int64); !ok || seed != 42 { + t.Errorf("seed = %T %v, want int64 42", inputs["seed"], inputs["seed"]) + } + if steps, ok := inputs["steps"].(int); !ok || steps != 11 { + t.Errorf("steps = %T %v, want int 11", inputs["steps"], inputs["steps"]) + } + if text, ok := inputs["text"].(string); !ok || text != "a cat" { + t.Errorf("text = %T %v, want string", inputs["text"], inputs["text"]) + } + if cfg, ok := inputs["cfg"].(float64); !ok || cfg != 4.5 { + t.Errorf("cfg = %T %v, want float64 4.5", inputs["cfg"], inputs["cfg"]) + } +} + +func TestSubstituteWorkflowMissingPlaceholderErrors(t *testing.T) { + wf := map[string]any{ + "1": map[string]any{"inputs": map[string]any{"v": "${missing}"}}, + } + _, err := SubstituteWorkflow(wf, map[string]any{}) + if err == nil { + t.Fatal("expected error for missing placeholder") + } + if !strings.Contains(err.Error(), "${missing}") { + t.Errorf("error should name the placeholder, got %v", err) + } +} + +func TestSubstituteWorkflowOnlyWholeTokens(t *testing.T) { + // Partial-match strings ("prefix ${prompt} suffix") are NOT substituted — + // the placeholder must be the whole value so we can preserve types. + wf := map[string]any{ + "1": map[string]any{"inputs": map[string]any{ + "keep_string": "stuff with ${prompt} inside", + "replace_full": "${prompt}", + }}, + } + used, err := SubstituteWorkflow(wf, map[string]any{"prompt": "x"}) + if err != nil { + t.Fatalf("Substitute: %v", err) + } + inputs := wf["1"].(map[string]any)["inputs"].(map[string]any) + if inputs["keep_string"].(string) != "stuff with ${prompt} inside" { + t.Errorf("partial match should be left alone, got %q", inputs["keep_string"]) + } + if inputs["replace_full"].(string) != "x" { + t.Errorf("full-value match should substitute, got %q", inputs["replace_full"]) + } + if _, ok := used["prompt"]; !ok { + t.Errorf("used should track keys that fired") + } +} + +func TestFlux1SchnellTemplateMatchesLegacyShape(t *testing.T) { + // Regression guard against the historical hardcoded workflow: every + // node ID the old Comfy.buildWorkflow used must still exist in the + // migrated template. + wf, err := LoadWorkflowTemplate("flux1-schnell") + if err != nil { + t.Fatalf("load flux1-schnell: %v", err) + } + legacyNodes := []string{"6", "8", "9", "10", "11", "12", "13", "27", "30", "31"} + for _, id := range legacyNodes { + if _, ok := wf[id]; !ok { + t.Errorf("flux1-schnell template missing node %q (legacy parity)", id) + } + } +} diff --git a/internal/backend/workflows/flux1-schnell.json b/internal/backend/workflows/flux1-schnell.json new file mode 100644 index 0000000..a137d4e --- /dev/null +++ b/internal/backend/workflows/flux1-schnell.json @@ -0,0 +1,84 @@ +{ + "6": { + "class_type": "CLIPTextEncode", + "inputs": { + "text": "${prompt}", + "clip": ["11", 0] + } + }, + "8": { + "class_type": "VAEDecode", + "inputs": { + "samples": ["31", 0], + "vae": ["10", 0] + } + }, + "9": { + "class_type": "SaveImage", + "inputs": { + "filename_prefix": "imagen", + "images": ["8", 0] + } + }, + "10": { + "class_type": "VAELoader", + "inputs": { + "vae_name": "${vae}" + } + }, + "11": { + "class_type": "DualCLIPLoader", + "inputs": { + "clip_name1": "${clip_t5}", + "clip_name2": "${clip_l}", + "type": "flux" + } + }, + "12": { + "class_type": "UNETLoader", + "inputs": { + "unet_name": "${model}", + "weight_dtype": "${dtype}" + } + }, + "13": { + "class_type": "CLIPTextEncode", + "inputs": { + "text": "${negative}", + "clip": ["11", 0] + } + }, + "27": { + "class_type": "EmptySD3LatentImage", + "inputs": { + "width": "${width}", + "height": "${height}", + "batch_size": 1 + } + }, + "30": { + "class_type": "ModelSamplingFlux", + "inputs": { + "model": ["12", 0], + "max_shift": 1.15, + "base_shift": 0.5, + "width": "${width}", + "height": "${height}" + } + }, + "31": { + "class_type": "KSampler", + "inputs": { + "model": ["30", 0], + "seed": "${seed}", + "steps": "${steps}", + "cfg": "${cfg}", + "sampler_name": "${sampler}", + "scheduler": "${scheduler}", + "denoise": 1.0, + "positive": ["6", 0], + "negative": ["13", 0], + "latent_image": ["27", 0] + } + } +} diff --git a/internal/backend/workflows/flux2-klein.json b/internal/backend/workflows/flux2-klein.json new file mode 100644 index 0000000..1c97701 --- /dev/null +++ b/internal/backend/workflows/flux2-klein.json @@ -0,0 +1,79 @@ +{ + "6": { + "class_type": "CLIPTextEncode", + "inputs": { + "text": "${prompt}", + "clip": ["11", 0] + } + }, + "8": { + "class_type": "VAEDecode", + "inputs": { + "samples": ["31", 0], + "vae": ["10", 0] + } + }, + "9": { + "class_type": "SaveImage", + "inputs": { + "filename_prefix": "imagen", + "images": ["8", 0] + } + }, + "10": { + "class_type": "VAELoader", + "inputs": { + "vae_name": "${vae}" + } + }, + "11": { + "class_type": "CLIPLoader", + "inputs": { + "clip_name": "${clip}", + "type": "flux2" + } + }, + "12": { + "class_type": "UNETLoader", + "inputs": { + "unet_name": "${model}", + "weight_dtype": "${dtype}" + } + }, + "14": { + "class_type": "FluxGuidance", + "inputs": { + "conditioning": ["6", 0], + "guidance": "${guidance}" + } + }, + "15": { + "class_type": "ConditioningZeroOut", + "inputs": { + "conditioning": ["6", 0] + } + }, + "27": { + "class_type": "EmptyFlux2LatentImage", + "inputs": { + "width": "${width}", + "height": "${height}", + "batch_size": 1 + } + }, + "31": { + "class_type": "KSampler", + "inputs": { + "model": ["12", 0], + "seed": "${seed}", + "steps": "${steps}", + "cfg": "${cfg}", + "sampler_name": "${sampler}", + "scheduler": "${scheduler}", + "denoise": 1.0, + "positive": ["14", 0], + "negative": ["15", 0], + "latent_image": ["27", 0] + } + } +} diff --git a/internal/backend/workflows/sd35-medium.json b/internal/backend/workflows/sd35-medium.json new file mode 100644 index 0000000..feb78a5 --- /dev/null +++ b/internal/backend/workflows/sd35-medium.json @@ -0,0 +1,66 @@ +{ + "4": { + "class_type": "CheckpointLoaderSimple", + "inputs": { + "ckpt_name": "${model}" + } + }, + "6": { + "class_type": "CLIPTextEncode", + "inputs": { + "text": "${prompt}", + "clip": ["4", 1] + } + }, + "7": { + "class_type": "CLIPTextEncode", + "inputs": { + "text": "${negative}", + "clip": ["4", 1] + } + }, + "8": { + "class_type": "VAEDecode", + "inputs": { + "samples": ["31", 0], + "vae": ["4", 2] + } + }, + "9": { + "class_type": "SaveImage", + "inputs": { + "filename_prefix": "imagen", + "images": ["8", 0] + } + }, + "13": { + "class_type": "ModelSamplingSD3", + "inputs": { + "model": ["4", 0], + "shift": "${shift}" + } + }, + "27": { + "class_type": "EmptySD3LatentImage", + "inputs": { + "width": "${width}", + "height": "${height}", + "batch_size": 1 + } + }, + "31": { + "class_type": "KSampler", + "inputs": { + "model": ["13", 0], + "seed": "${seed}", + "steps": "${steps}", + "cfg": "${cfg}", + "sampler_name": "${sampler}", + "scheduler": "${scheduler}", + "denoise": 1.0, + "positive": ["6", 0], + "negative": ["7", 0], + "latent_image": ["27", 0] + } + } +} diff --git a/internal/config/config.go b/internal/config/config.go index a8a809e..4d4f236 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -144,15 +144,54 @@ output: cloud_sync: auto backends: + # FLUX.1-schnell on the local ComfyUI server. The "workflow" key picks the + # bundled template under internal/backend/workflows/; omit it for back-compat + # (defaults to flux1-schnell). See docs/backends.md for the per-model setup. flux-schnell-local: type: comfyui base_url: http://mrock:8188 + workflow: flux1-schnell # Filename of the unet checkpoint inside the ComfyUI server's - # models/unet/ directory. See docs/setup-comfyui-mrock.md. + # models/unet/ directory. model: flux1-schnell.safetensors + vae: ae.safetensors + clip_l: clip_l.safetensors + clip_t5: t5xxl_fp8_e4m3fn.safetensors + dtype: fp8_e4m3fn default_steps: 4 default_sampler: euler default_scheduler: simple + default_cfg: 1.0 + + # FLUX.2 [klein] 4B distilled — sub-second on RTX 4070 Ti SUPER. + # Weights: BFL non-commercial; flux-2-klein-base-4b-fp8 in models/unet/, + # qwen_3_4b in models/text_encoders/, flux2-vae in models/vae/. + flux2-klein-local: + type: comfyui + base_url: http://mrock:8188 + workflow: flux2-klein + model: flux-2-klein-base-4b-fp8.safetensors + vae: flux2-vae.safetensors + clip: qwen_3_4b.safetensors + dtype: fp8_e4m3fn + default_steps: 4 + default_sampler: euler + default_scheduler: simple + default_cfg: 1.0 + guidance: 4.0 + + # SD3.5 medium — single-checkpoint variant that bundles the three text + # encoders inside the .safetensors. Drop into models/checkpoints/. + sd35-medium-local: + type: comfyui + base_url: http://mrock:8188 + workflow: sd35-medium + model: sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors + default_steps: 28 + default_sampler: dpmpp_2m + default_scheduler: sgm_uniform + default_cfg: 4.5 + shift: 3.0 mock: type: mock