Merge mai/hermes/issue-10-multi-model: multi-model backend expansion + compare harness (#10)
This commit is contained in:
386
cmd/imagen/compare.go
Normal file
386
cmd/imagen/compare.go
Normal file
@@ -0,0 +1,386 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"image"
|
||||||
|
"image/color"
|
||||||
|
"image/draw"
|
||||||
|
"image/png"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/image/font"
|
||||||
|
"golang.org/x/image/font/basicfont"
|
||||||
|
"golang.org/x/image/math/fixed"
|
||||||
|
|
||||||
|
"mgit.msbls.de/m/ImaGen/internal/backend"
|
||||||
|
"mgit.msbls.de/m/ImaGen/internal/config"
|
||||||
|
"mgit.msbls.de/m/ImaGen/internal/output"
|
||||||
|
"mgit.msbls.de/m/ImaGen/internal/prompt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// runCompare implements `imagen compare "<prompt>" --models a,b,c --output <dir>`.
|
||||||
|
//
|
||||||
|
// Each backend in --models runs sequentially against the same prompt (mRock
|
||||||
|
// has a single GPU; parallelising would just OOM). Each generation lands as
|
||||||
|
// a backend-suffixed file in the output dir; a contact sheet stitches them
|
||||||
|
// together into one PNG with the backend name overlaid on each cell. A
|
||||||
|
// sidecar JSON next to the contact sheet lists every generation with its
|
||||||
|
// per-model metadata (latency, seed, model file, VRAM peak).
|
||||||
|
func runCompare(ctx context.Context, args []string) error {
|
||||||
|
fs := flag.NewFlagSet("compare", flag.ContinueOnError)
|
||||||
|
var (
|
||||||
|
modelsCSV string
|
||||||
|
size string
|
||||||
|
outDir string
|
||||||
|
style string
|
||||||
|
negative string
|
||||||
|
seed int64
|
||||||
|
steps int
|
||||||
|
configPath string
|
||||||
|
noContact bool
|
||||||
|
)
|
||||||
|
fs.StringVar(&modelsCSV, "models", "", "comma-separated backend instance names (required)")
|
||||||
|
fs.StringVar(&size, "size", "1024x1024", "WxH for every backend")
|
||||||
|
fs.StringVar(&outDir, "output", "", "directory to write the images + contact sheet (default: ~/Pictures/imagen/compare)")
|
||||||
|
fs.StringVar(&style, "style", "", "style preset applied to the prompt before dispatching to each backend")
|
||||||
|
fs.StringVar(&negative, "negative", "", "negative prompt (forwarded to every backend that supports it)")
|
||||||
|
fs.Int64Var(&seed, "seed", 0, "deterministic seed for every backend (0 = each backend rolls its own)")
|
||||||
|
fs.IntVar(&steps, "steps", 0, "diffusion steps (0 = each backend's default)")
|
||||||
|
fs.StringVar(&configPath, "config", "", "config file path (default: ~/.config/imagen.yaml)")
|
||||||
|
fs.BoolVar(&noContact, "no-contact-sheet", false, "skip the composite PNG; only write per-backend images + sidecar")
|
||||||
|
fs.Usage = func() {
|
||||||
|
fmt.Fprintln(fs.Output(), `Usage: imagen compare "<prompt>" --models a,b,c [flags]`)
|
||||||
|
fs.PrintDefaults()
|
||||||
|
}
|
||||||
|
leadingPositional, flagArgs := splitLeadingPositional(args)
|
||||||
|
if err := fs.Parse(flagArgs); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
positional := append(leadingPositional, fs.Args()...)
|
||||||
|
if len(positional) == 0 {
|
||||||
|
fs.Usage()
|
||||||
|
return userErr("missing prompt")
|
||||||
|
}
|
||||||
|
rawPrompt := strings.Join(positional, " ")
|
||||||
|
modelNames := splitCSV(modelsCSV)
|
||||||
|
if len(modelNames) == 0 {
|
||||||
|
return userErr("--models is required (comma-separated backend instance names)")
|
||||||
|
}
|
||||||
|
|
||||||
|
w, h, err := parseSize(size)
|
||||||
|
if err != nil {
|
||||||
|
return userErr("bad --size: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, cfgErr := config.Load(configPath)
|
||||||
|
if cfgErr != nil && !os.IsNotExist(cfgErr) {
|
||||||
|
return cfgErr
|
||||||
|
}
|
||||||
|
|
||||||
|
if outDir == "" {
|
||||||
|
home, _ := os.UserHomeDir()
|
||||||
|
outDir = filepath.Join(home, "Pictures", "imagen", "compare")
|
||||||
|
}
|
||||||
|
outDir = config.ExpandPath(outDir)
|
||||||
|
|
||||||
|
finalPrompt, err := prompt.Apply(rawPrompt, style)
|
||||||
|
if err != nil {
|
||||||
|
return userErr("%v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
runID := time.Now().Format("20060102-150405")
|
||||||
|
runDir := filepath.Join(outDir, runID+"-"+output.Slug(rawPrompt))
|
||||||
|
if err := os.MkdirAll(runDir, 0o755); err != nil {
|
||||||
|
return fmt.Errorf("mkdir %s: %w", runDir, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]compareResult, 0, len(modelNames))
|
||||||
|
for i, name := range modelNames {
|
||||||
|
fmt.Fprintf(os.Stderr, "[%d/%d] %s ...\n", i+1, len(modelNames), name)
|
||||||
|
res, err := generateOne(ctx, cfg, name, finalPrompt, negative, w, h, seed, steps, runDir, rawPrompt)
|
||||||
|
if err != nil {
|
||||||
|
// Don't abort the whole run on a single backend failure — record
|
||||||
|
// the error and continue. flexsiebels-style consumers want to
|
||||||
|
// see N-1 results rather than zero when one model is offline.
|
||||||
|
fmt.Fprintf(os.Stderr, " failed: %v\n", err)
|
||||||
|
results = append(results, compareResult{Backend: name, Error: err.Error()})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, " %s (%d ms)\n", res.ImagePath, res.LatencyMs)
|
||||||
|
results = append(results, res)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sidecar JSON beside the run dir captures every attempt.
|
||||||
|
sidecar := filepath.Join(runDir, "compare.json")
|
||||||
|
if err := writeCompareSidecar(sidecar, rawPrompt, style, negative, w, h, seed, steps, results); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fmt.Fprintln(os.Stderr, "sidecar:", sidecar)
|
||||||
|
|
||||||
|
// Contact sheet stitches the successful results together. If every
|
||||||
|
// backend failed there's nothing to draw, so skip silently.
|
||||||
|
if !noContact {
|
||||||
|
successes := successfulResults(results)
|
||||||
|
if len(successes) > 0 {
|
||||||
|
sheet := filepath.Join(runDir, "contact-sheet.png")
|
||||||
|
if err := writeContactSheet(sheet, rawPrompt, successes); err != nil {
|
||||||
|
return fmt.Errorf("contact sheet: %w", err)
|
||||||
|
}
|
||||||
|
fmt.Println(sheet)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintln(os.Stderr, "imagen compare: all backends failed; no contact sheet written")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// compareResult is one backend's output in a comparison run. Error is set
|
||||||
|
// when Generate failed for this backend; ImagePath + Metadata are empty in
|
||||||
|
// that case.
|
||||||
|
type compareResult struct {
|
||||||
|
Backend string `json:"backend"`
|
||||||
|
ImagePath string `json:"image_path,omitempty"`
|
||||||
|
Seed int64 `json:"seed"`
|
||||||
|
LatencyMs int64 `json:"latency_ms,omitempty"`
|
||||||
|
Model string `json:"model,omitempty"`
|
||||||
|
VRAMUsedMiB int64 `json:"vram_used_mib,omitempty"`
|
||||||
|
Metadata map[string]any `json:"metadata,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateOne(ctx context.Context, cfg *config.Config, name, finalPrompt, negative string, w, h int, seed int64, steps int, runDir, rawPrompt string) (compareResult, error) {
|
||||||
|
be, err := buildBackend(cfg, name)
|
||||||
|
if err != nil {
|
||||||
|
return compareResult{Backend: name}, err
|
||||||
|
}
|
||||||
|
attachUsageSink(be)
|
||||||
|
|
||||||
|
req := backend.Request{
|
||||||
|
Prompt: finalPrompt,
|
||||||
|
NegativePrompt: negative,
|
||||||
|
Width: w,
|
||||||
|
Height: h,
|
||||||
|
Steps: steps,
|
||||||
|
Seed: seed,
|
||||||
|
}
|
||||||
|
res, err := be.Generate(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return compareResult{Backend: name}, err
|
||||||
|
}
|
||||||
|
defer res.ImageReader.Close()
|
||||||
|
|
||||||
|
imgBytes, err := io.ReadAll(res.ImageReader)
|
||||||
|
if err != nil {
|
||||||
|
return compareResult{Backend: name}, fmt.Errorf("read image: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
imgPath := filepath.Join(runDir, output.Slug(rawPrompt)+"--"+output.Slug(name)+"."+extFromMime(res.MimeType))
|
||||||
|
if err := os.WriteFile(imgPath, imgBytes, 0o644); err != nil {
|
||||||
|
return compareResult{Backend: name}, fmt.Errorf("write %s: %w", imgPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cr := compareResult{
|
||||||
|
Backend: name,
|
||||||
|
ImagePath: imgPath,
|
||||||
|
Seed: seedFromMetadata(res.Metadata, seed),
|
||||||
|
LatencyMs: metaInt64(res.Metadata, "latency_ms"),
|
||||||
|
Model: metaString(res.Metadata, "model"),
|
||||||
|
Metadata: res.Metadata,
|
||||||
|
}
|
||||||
|
if v, ok := res.Metadata["vram_used_mib"].(int64); ok {
|
||||||
|
cr.VRAMUsedMiB = v
|
||||||
|
}
|
||||||
|
return cr, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func successfulResults(rs []compareResult) []compareResult {
|
||||||
|
out := make([]compareResult, 0, len(rs))
|
||||||
|
for _, r := range rs {
|
||||||
|
if r.Error == "" && r.ImagePath != "" {
|
||||||
|
out = append(out, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeCompareSidecar(path, rawPrompt, style, negative string, w, h int, seed int64, steps int, results []compareResult) error {
|
||||||
|
body := map[string]any{
|
||||||
|
"timestamp": time.Now().UTC().Format(time.RFC3339),
|
||||||
|
"prompt": rawPrompt,
|
||||||
|
"style": style,
|
||||||
|
"negative": negative,
|
||||||
|
"width": w,
|
||||||
|
"height": h,
|
||||||
|
"seed": seed,
|
||||||
|
"steps": steps,
|
||||||
|
"results": results,
|
||||||
|
"backends": backendNames(results),
|
||||||
|
"successful": len(successfulResults(results)),
|
||||||
|
"total": len(results),
|
||||||
|
}
|
||||||
|
data, err := json.MarshalIndent(body, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal sidecar: %w", err)
|
||||||
|
}
|
||||||
|
return os.WriteFile(path, append(data, '\n'), 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func backendNames(rs []compareResult) []string {
|
||||||
|
out := make([]string, len(rs))
|
||||||
|
for i, r := range rs {
|
||||||
|
out[i] = r.Backend
|
||||||
|
}
|
||||||
|
sort.Strings(out)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeContactSheet stitches a grid of (image, label) cells into one PNG.
|
||||||
|
// Cells are sized to fit in a target width of ~2400px while keeping each
|
||||||
|
// individual image full-resolution (no downscale) up to the column limit;
|
||||||
|
// past that, images sit at their native size and we just lay them out.
|
||||||
|
//
|
||||||
|
// The grid is a simple horizontal row when N <= 4; otherwise N/2 rows of 2.
|
||||||
|
// This is a contact sheet, not a fancy gallery — readability for side-by-
|
||||||
|
// side eyeballing is the goal.
|
||||||
|
func writeContactSheet(path, prompt string, results []compareResult) error {
|
||||||
|
if len(results) == 0 {
|
||||||
|
return fmt.Errorf("no successful results to lay out")
|
||||||
|
}
|
||||||
|
cells := make([]contactCell, 0, len(results))
|
||||||
|
for _, r := range results {
|
||||||
|
img, err := readPNG(r.ImagePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("read %s: %w", r.ImagePath, err)
|
||||||
|
}
|
||||||
|
cells = append(cells, contactCell{
|
||||||
|
Image: img,
|
||||||
|
Label: r.Backend,
|
||||||
|
SubLabel: fmt.Sprintf("%dms · seed %d", r.LatencyMs, r.Seed),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
cols := len(cells)
|
||||||
|
if cols > 4 {
|
||||||
|
cols = 2
|
||||||
|
}
|
||||||
|
rows := (len(cells) + cols - 1) / cols
|
||||||
|
|
||||||
|
const labelH = 64
|
||||||
|
const pad = 16
|
||||||
|
|
||||||
|
cellW := cells[0].Image.Bounds().Dx()
|
||||||
|
cellH := cells[0].Image.Bounds().Dy()
|
||||||
|
for _, c := range cells {
|
||||||
|
if w := c.Image.Bounds().Dx(); w > cellW {
|
||||||
|
cellW = w
|
||||||
|
}
|
||||||
|
if h := c.Image.Bounds().Dy(); h > cellH {
|
||||||
|
cellH = h
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
totalW := cols*cellW + (cols+1)*pad
|
||||||
|
totalH := rows*(cellH+labelH) + (rows+1)*pad + 48 // header band
|
||||||
|
|
||||||
|
canvas := image.NewRGBA(image.Rect(0, 0, totalW, totalH))
|
||||||
|
draw.Draw(canvas, canvas.Bounds(), &image.Uniform{C: color.RGBA{R: 30, G: 30, B: 35, A: 255}}, image.Point{}, draw.Src)
|
||||||
|
|
||||||
|
// Header: show the truncated prompt.
|
||||||
|
headerText := "imagen compare — " + truncate(prompt, 100)
|
||||||
|
drawText(canvas, headerText, pad, 30, color.RGBA{R: 240, G: 240, B: 245, A: 255})
|
||||||
|
|
||||||
|
for i, c := range cells {
|
||||||
|
col := i % cols
|
||||||
|
row := i / cols
|
||||||
|
x0 := pad + col*(cellW+pad)
|
||||||
|
y0 := 48 + pad + row*(cellH+labelH+pad)
|
||||||
|
// Center the image inside the cell when smaller than the max cell size.
|
||||||
|
iw := c.Image.Bounds().Dx()
|
||||||
|
ih := c.Image.Bounds().Dy()
|
||||||
|
offX := (cellW - iw) / 2
|
||||||
|
offY := (cellH - ih) / 2
|
||||||
|
dstRect := image.Rect(x0+offX, y0+offY, x0+offX+iw, y0+offY+ih)
|
||||||
|
draw.Draw(canvas, dstRect, c.Image, c.Image.Bounds().Min, draw.Src)
|
||||||
|
|
||||||
|
// Label band underneath.
|
||||||
|
labelY := y0 + cellH + 20
|
||||||
|
drawText(canvas, c.Label, x0+8, labelY, color.RGBA{R: 250, G: 250, B: 250, A: 255})
|
||||||
|
drawText(canvas, c.SubLabel, x0+8, labelY+22, color.RGBA{R: 180, G: 180, B: 190, A: 255})
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Create(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("create %s: %w", path, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
return png.Encode(f, canvas)
|
||||||
|
}
|
||||||
|
|
||||||
|
type contactCell struct {
|
||||||
|
Image image.Image
|
||||||
|
Label string
|
||||||
|
SubLabel string
|
||||||
|
}
|
||||||
|
|
||||||
|
func readPNG(path string) (image.Image, error) {
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
img, _, err := image.Decode(f)
|
||||||
|
return img, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func drawText(dst *image.RGBA, s string, x, y int, c color.Color) {
|
||||||
|
drawer := &font.Drawer{
|
||||||
|
Dst: dst,
|
||||||
|
Src: &image.Uniform{C: c},
|
||||||
|
Face: basicfont.Face7x13,
|
||||||
|
Dot: fixed.Point26_6{X: fixed.I(x), Y: fixed.I(y)},
|
||||||
|
}
|
||||||
|
drawer.DrawString(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func truncate(s string, max int) string {
|
||||||
|
if len(s) <= max {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return s[:max-1] + "…"
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitCSV(s string) []string {
|
||||||
|
parts := strings.Split(s, ",")
|
||||||
|
out := make([]string, 0, len(parts))
|
||||||
|
for _, p := range parts {
|
||||||
|
p = strings.TrimSpace(p)
|
||||||
|
if p != "" {
|
||||||
|
out = append(out, p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func metaInt64(m map[string]any, key string) int64 {
|
||||||
|
v, ok := m[key]
|
||||||
|
if !ok {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
switch n := v.(type) {
|
||||||
|
case int64:
|
||||||
|
return n
|
||||||
|
case int:
|
||||||
|
return int64(n)
|
||||||
|
case float64:
|
||||||
|
return int64(n)
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
203
cmd/imagen/compare_test.go
Normal file
203
cmd/imagen/compare_test.go
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"image/png"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// runCompareWithEnv runs the compare subcommand in a writable tmpdir, with
|
||||||
|
// XDG_CONFIG_HOME pointing somewhere empty so no host imagen.yaml leaks in.
|
||||||
|
func runCompareWithEnv(t *testing.T, args []string) (stderr, stdout *bytes.Buffer, runDir string, err error) {
|
||||||
|
t.Helper()
|
||||||
|
tmp := t.TempDir()
|
||||||
|
t.Setenv("XDG_CONFIG_HOME", filepath.Join(tmp, "no-config"))
|
||||||
|
t.Setenv("HOME", tmp)
|
||||||
|
|
||||||
|
out := filepath.Join(tmp, "compare")
|
||||||
|
// stdlib flag parsing requires flags after the leading positional. Append
|
||||||
|
// --output at the end so any caller-supplied flags still parse cleanly.
|
||||||
|
args = append(args, "--output", out)
|
||||||
|
|
||||||
|
// Capture stdout/stderr via os pipes since runCompare writes directly.
|
||||||
|
oldStdout := os.Stdout
|
||||||
|
oldStderr := os.Stderr
|
||||||
|
rOut, wOut, _ := os.Pipe()
|
||||||
|
rErr, wErr, _ := os.Pipe()
|
||||||
|
os.Stdout = wOut
|
||||||
|
os.Stderr = wErr
|
||||||
|
defer func() {
|
||||||
|
os.Stdout = oldStdout
|
||||||
|
os.Stderr = oldStderr
|
||||||
|
}()
|
||||||
|
|
||||||
|
cmdErr := runCompare(context.Background(), args)
|
||||||
|
|
||||||
|
_ = wOut.Close()
|
||||||
|
_ = wErr.Close()
|
||||||
|
stdout = &bytes.Buffer{}
|
||||||
|
stderr = &bytes.Buffer{}
|
||||||
|
_, _ = stdout.ReadFrom(rOut)
|
||||||
|
_, _ = stderr.ReadFrom(rErr)
|
||||||
|
|
||||||
|
entries, _ := os.ReadDir(out)
|
||||||
|
if len(entries) == 1 {
|
||||||
|
runDir = filepath.Join(out, entries[0].Name())
|
||||||
|
}
|
||||||
|
return stderr, stdout, runDir, cmdErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompareHappyPathWithMockBackends(t *testing.T) {
|
||||||
|
// Two mock instances stand in for two different backends. mock ignores
|
||||||
|
// cfg so we can reuse the registered type as the instance name and skip
|
||||||
|
// writing imagen.yaml entirely.
|
||||||
|
stderr, stdout, runDir, err := runCompareWithEnv(t, []string{
|
||||||
|
"a cat in a fishbowl",
|
||||||
|
"--models", "mock,mock",
|
||||||
|
"--size", "64x64",
|
||||||
|
"--seed", "42",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("runCompare: %v\nstderr: %s", err, stderr.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
if runDir == "" {
|
||||||
|
t.Fatal("expected a run directory under --output")
|
||||||
|
}
|
||||||
|
// Sidecar JSON
|
||||||
|
sidecar := filepath.Join(runDir, "compare.json")
|
||||||
|
data, err := os.ReadFile(sidecar)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read sidecar: %v", err)
|
||||||
|
}
|
||||||
|
var body struct {
|
||||||
|
Prompt string `json:"prompt"`
|
||||||
|
Successful int `json:"successful"`
|
||||||
|
Total int `json:"total"`
|
||||||
|
Results []struct {
|
||||||
|
Backend string `json:"backend"`
|
||||||
|
ImagePath string `json:"image_path"`
|
||||||
|
Error string `json:"error"`
|
||||||
|
} `json:"results"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(data, &body); err != nil {
|
||||||
|
t.Fatalf("parse sidecar: %v\n%s", err, data)
|
||||||
|
}
|
||||||
|
if body.Prompt != "a cat in a fishbowl" {
|
||||||
|
t.Errorf("prompt = %q", body.Prompt)
|
||||||
|
}
|
||||||
|
if body.Total != 2 || body.Successful != 2 {
|
||||||
|
t.Errorf("counts = %d successful / %d total", body.Successful, body.Total)
|
||||||
|
}
|
||||||
|
for _, r := range body.Results {
|
||||||
|
if r.Error != "" {
|
||||||
|
t.Errorf("backend %s errored: %s", r.Backend, r.Error)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(r.ImagePath); err != nil {
|
||||||
|
t.Errorf("image not on disk for %s: %v", r.Backend, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contact sheet path was printed on stdout.
|
||||||
|
sheet := strings.TrimSpace(stdout.String())
|
||||||
|
if sheet == "" {
|
||||||
|
t.Fatal("expected contact sheet path on stdout")
|
||||||
|
}
|
||||||
|
f, err := os.Open(sheet)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open contact sheet: %v", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
img, err := png.Decode(f)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("decode contact sheet PNG: %v", err)
|
||||||
|
}
|
||||||
|
if w := img.Bounds().Dx(); w < 100 {
|
||||||
|
t.Errorf("contact sheet looks empty (width %d)", w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompareSkipContactSheet(t *testing.T) {
|
||||||
|
stderr, stdout, runDir, err := runCompareWithEnv(t, []string{
|
||||||
|
"x",
|
||||||
|
"--models", "mock",
|
||||||
|
"--size", "32x32",
|
||||||
|
"--seed", "1",
|
||||||
|
"--no-contact-sheet",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("runCompare: %v\nstderr: %s", err, stderr.String())
|
||||||
|
}
|
||||||
|
if got := strings.TrimSpace(stdout.String()); got != "" {
|
||||||
|
t.Errorf("expected no stdout output (no contact sheet), got %q", got)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(filepath.Join(runDir, "contact-sheet.png")); err == nil {
|
||||||
|
t.Errorf("contact-sheet.png should not exist with --no-contact-sheet")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompareRecordsBackendErrors(t *testing.T) {
|
||||||
|
// One real (mock) + one unknown. Unknown should fail but not abort the
|
||||||
|
// run — sidecar records both, contact sheet built from successes only.
|
||||||
|
stderr, _, runDir, err := runCompareWithEnv(t, []string{
|
||||||
|
"y",
|
||||||
|
"--models", "mock,this-instance-does-not-exist",
|
||||||
|
"--size", "32x32",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("runCompare: %v\nstderr: %s", err, stderr.String())
|
||||||
|
}
|
||||||
|
sidecar := filepath.Join(runDir, "compare.json")
|
||||||
|
data, _ := os.ReadFile(sidecar)
|
||||||
|
var body struct {
|
||||||
|
Successful int `json:"successful"`
|
||||||
|
Total int `json:"total"`
|
||||||
|
Results []struct {
|
||||||
|
Backend string `json:"backend"`
|
||||||
|
Error string `json:"error"`
|
||||||
|
} `json:"results"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(data, &body); err != nil {
|
||||||
|
t.Fatalf("parse sidecar: %v", err)
|
||||||
|
}
|
||||||
|
if body.Total != 2 {
|
||||||
|
t.Errorf("expected 2 results, got %d", body.Total)
|
||||||
|
}
|
||||||
|
if body.Successful != 1 {
|
||||||
|
t.Errorf("expected 1 success, got %d", body.Successful)
|
||||||
|
}
|
||||||
|
var sawError bool
|
||||||
|
for _, r := range body.Results {
|
||||||
|
if r.Backend == "this-instance-does-not-exist" && r.Error != "" {
|
||||||
|
sawError = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !sawError {
|
||||||
|
t.Errorf("expected an error recorded for the unknown backend")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompareNoModelsFails(t *testing.T) {
|
||||||
|
_, _, _, err := runCompareWithEnv(t, []string{"x"})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error when --models is empty")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "--models") {
|
||||||
|
t.Errorf("error should mention --models, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompareNoPromptFails(t *testing.T) {
|
||||||
|
_, _, _, err := runCompareWithEnv(t, []string{"--models", "mock"})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error when prompt is missing")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "missing prompt") {
|
||||||
|
t.Errorf("error should mention missing prompt, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -18,6 +18,8 @@ const helpText = `imagen — model-agnostic image generation
|
|||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
imagen generate <prompt> [flags] generate one image
|
imagen generate <prompt> [flags] generate one image
|
||||||
|
imagen compare <prompt> --models a,b,c [flags]
|
||||||
|
run one prompt across N backends + contact sheet
|
||||||
imagen worker [flags] consume the imagen.jobs queue (daemon)
|
imagen worker [flags] consume the imagen.jobs queue (daemon)
|
||||||
imagen backends list registered backend types
|
imagen backends list registered backend types
|
||||||
imagen config init print a sample imagen.yaml on stdout
|
imagen config init print a sample imagen.yaml on stdout
|
||||||
@@ -46,6 +48,8 @@ func main() {
|
|||||||
switch os.Args[1] {
|
switch os.Args[1] {
|
||||||
case "generate":
|
case "generate":
|
||||||
err = runGenerate(ctx, args)
|
err = runGenerate(ctx, args)
|
||||||
|
case "compare":
|
||||||
|
err = runCompare(ctx, args)
|
||||||
case "worker":
|
case "worker":
|
||||||
err = runWorker(ctx, args)
|
err = runWorker(ctx, args)
|
||||||
case "backends":
|
case "backends":
|
||||||
|
|||||||
310
docs/backends.md
Normal file
310
docs/backends.md
Normal file
@@ -0,0 +1,310 @@
|
|||||||
|
# ImaGen backends
|
||||||
|
|
||||||
|
This document covers the local-ComfyUI backend plug-in story: how adapters
|
||||||
|
are layered, how to add a new model without touching Go, and the per-model
|
||||||
|
setup steps for the bundled templates.
|
||||||
|
|
||||||
|
For the host-side ComfyUI install (mRock — venv, weights for the default
|
||||||
|
FLUX.1-schnell, systemd, VRAM coexistence with Ollama, smoke test against
|
||||||
|
the raw HTTP API), see [`setup-comfyui-mrock.md`](setup-comfyui-mrock.md).
|
||||||
|
|
||||||
|
## Architecture: Path 1 — workflow-template adapter
|
||||||
|
|
||||||
|
`imagen generate` and `imagen compare` dispatch through the `comfyui`
|
||||||
|
adapter, which holds the HTTP plumbing (`/prompt`, `/history/{id}`, `/view`,
|
||||||
|
`/system_stats`) and treats the workflow itself as data. Each backend
|
||||||
|
instance in `imagen.yaml` picks a workflow JSON via the `workflow:` key.
|
||||||
|
Adding a new model is yaml + JSON, never Go:
|
||||||
|
|
||||||
|
```
|
||||||
|
internal/backend/
|
||||||
|
comfyui.go # one adapter, all ComfyUI models
|
||||||
|
workflow_template.go # loader + token-substitution
|
||||||
|
workflows/
|
||||||
|
flux1-schnell.json # bundled templates (embedded with //go:embed)
|
||||||
|
flux2-klein.json
|
||||||
|
sd35-medium.json
|
||||||
|
```
|
||||||
|
|
||||||
|
### Why Path 1 over per-family adapters (`comfyui-flux.go`, `comfyui-sd3.go`…)
|
||||||
|
|
||||||
|
- **Workflow JSON is the natural exchange format**. ComfyUI users export
|
||||||
|
workflows from its GUI as JSON. Anything else means rebuilding the graph
|
||||||
|
by hand in Go for every new model.
|
||||||
|
- **Adding a model is a config change, not a build change**. With Path 2,
|
||||||
|
every new family is a Go file, a new test file, a registry entry, a new
|
||||||
|
worker binary, a redeploy. Path 1 lets us land a new model with one yaml
|
||||||
|
block + one JSON file + one section in this doc.
|
||||||
|
- **The HTTP plumbing is identical across families**. `/prompt`,
|
||||||
|
`/history`, `/view`, the retry policy, the "value not in list" hint, VRAM
|
||||||
|
reporting — none of it depends on the workflow shape. Path 2 would
|
||||||
|
duplicate that across files.
|
||||||
|
- **Failure isolation stays clean**. The workflow loader fails at adapter
|
||||||
|
construction (`imagen backends` surfaces the error), the HTTP layer
|
||||||
|
fails at `Generate`, and ComfyUI's own validation surfaces missing-model
|
||||||
|
hints. Each layer's error message points at the right config knob.
|
||||||
|
|
||||||
|
Path 2's argument was "each family owns its quirks (samplers, schedulers,
|
||||||
|
dual-stage etc.)". That argument doesn't survive contact with the
|
||||||
|
substitution-map design: per-family knobs are just key/value fields in the
|
||||||
|
yaml block and `${shift}`/`${guidance}`/`${cfg}` placeholders in the
|
||||||
|
template. No code duplication, no inheritance to debug.
|
||||||
|
|
||||||
|
### Token substitution
|
||||||
|
|
||||||
|
`workflow_template.SubstituteWorkflow` walks the parsed JSON and replaces
|
||||||
|
every whole-value string of the form `"${key}"` with the typed value from
|
||||||
|
the substitution map. Numbers stay numbers, strings stay strings — no
|
||||||
|
round-tripping through `strings.Replace`.
|
||||||
|
|
||||||
|
The substitution map is built per call from:
|
||||||
|
|
||||||
|
1. **Request fields** (always present): `${prompt}`, `${negative}`,
|
||||||
|
`${width}`, `${height}`, `${seed}`, `${steps}`, `${sampler}`,
|
||||||
|
`${scheduler}`, `${cfg}`.
|
||||||
|
2. **Every scalar field from the yaml block** (string / int / int64 /
|
||||||
|
float64 / bool), minus framework keys (`type`, `base_url`, `workflow`,
|
||||||
|
`default_*`). So `${vae}`, `${clip}`, `${clip_l}`, `${clip_t5}`,
|
||||||
|
`${dtype}`, `${shift}`, `${guidance}` all become substitutable just by
|
||||||
|
being in yaml.
|
||||||
|
3. **Sensible defaults** for the common optional knobs above, so a
|
||||||
|
workflow that references `${dtype}` without the user setting one in
|
||||||
|
yaml still substitutes cleanly (`fp8_e4m3fn` for FLUX, `3.0` for SD3
|
||||||
|
shift, etc.). Extra defaults are ignored by workflows that don't
|
||||||
|
reference them.
|
||||||
|
|
||||||
|
Partial matches (e.g. `"prefix ${prompt} suffix"`) are deliberately **not**
|
||||||
|
substituted — the placeholder must be the entire value so we can preserve
|
||||||
|
its JSON type. This prevents a prompt containing literal `${seed}` text
|
||||||
|
from corrupting the workflow.
|
||||||
|
|
||||||
|
Unknown placeholders (referenced in JSON but missing from the substitution
|
||||||
|
map) error out before the workflow leaves the binary.
|
||||||
|
|
||||||
|
### Back-compat
|
||||||
|
|
||||||
|
The `workflow:` field defaults to `flux1-schnell` if omitted. Existing
|
||||||
|
yaml blocks like the pre-#10 FLUX.1-schnell instance:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
flux-schnell-local:
|
||||||
|
type: comfyui
|
||||||
|
base_url: http://mrock:8188
|
||||||
|
model: flux1-schnell.safetensors
|
||||||
|
```
|
||||||
|
|
||||||
|
still work unchanged — they implicitly pick up the migrated
|
||||||
|
`flux1-schnell.json` template, which keeps the same node IDs (6, 8, 9, 10,
|
||||||
|
11, 12, 13, 27, 30, 31) as the historical hardcoded workflow.
|
||||||
|
|
||||||
|
## Bundled workflows
|
||||||
|
|
||||||
|
### FLUX.1-schnell — the back-compat default
|
||||||
|
|
||||||
|
| Field | Default | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| `model` | `flux1-schnell.safetensors` | drop in `models/unet/` |
|
||||||
|
| `vae` | `ae.safetensors` | `models/vae/` |
|
||||||
|
| `clip_l` | `clip_l.safetensors` | `models/clip/` |
|
||||||
|
| `clip_t5` | `t5xxl_fp8_e4m3fn.safetensors` | `models/clip/` |
|
||||||
|
| `dtype` | `fp8_e4m3fn` | weight dtype for the UNet loader |
|
||||||
|
| `default_steps` / `default_cfg` | 4 / 1.0 | schnell is distilled to ~4 steps |
|
||||||
|
|
||||||
|
VRAM peak ~10–12 GB at 1024×1024. Install path:
|
||||||
|
[`setup-comfyui-mrock.md`](setup-comfyui-mrock.md). Already shipping.
|
||||||
|
|
||||||
|
### FLUX.2 [klein] 4B — direct upgrade
|
||||||
|
|
||||||
|
Released by Black Forest Labs late 2025 / early 2026, BFL non-commercial
|
||||||
|
license. The distilled 4B "klein" variant lands sub-second on the RTX
|
||||||
|
4070 Ti SUPER and shares the new Qwen-based text encoder + a re-trained
|
||||||
|
VAE with the larger family.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
flux2-klein-local:
|
||||||
|
type: comfyui
|
||||||
|
base_url: http://mrock:8188
|
||||||
|
workflow: flux2-klein
|
||||||
|
model: flux-2-klein-base-4b-fp8.safetensors # models/unet/
|
||||||
|
vae: flux2-vae.safetensors # models/vae/
|
||||||
|
clip: qwen_3_4b.safetensors # models/text_encoders/
|
||||||
|
dtype: fp8_e4m3fn
|
||||||
|
default_steps: 4
|
||||||
|
default_cfg: 1.0
|
||||||
|
guidance: 4.0
|
||||||
|
```
|
||||||
|
|
||||||
|
**Model downloads** (on mRock, ungated mirrors when available):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/dev/comfyui/models
|
||||||
|
curl -L -o unet/flux-2-klein-base-4b-fp8.safetensors \
|
||||||
|
https://huggingface.co/black-forest-labs/FLUX.2-klein/resolve/main/flux-2-klein-base-4b-fp8.safetensors
|
||||||
|
curl -L -o vae/flux2-vae.safetensors \
|
||||||
|
https://huggingface.co/black-forest-labs/FLUX.2-klein/resolve/main/flux2-vae.safetensors
|
||||||
|
mkdir -p text_encoders
|
||||||
|
curl -L -o text_encoders/qwen_3_4b.safetensors \
|
||||||
|
https://huggingface.co/black-forest-labs/FLUX.2-klein/resolve/main/qwen_3_4b.safetensors
|
||||||
|
```
|
||||||
|
|
||||||
|
BFL's primary repo is gated; if `curl` returns 401, configure an HF token
|
||||||
|
in `~/.cache/huggingface/token` or use one of the community mirrors
|
||||||
|
(check the official model card for the current list). The filenames the
|
||||||
|
template references match BFL's canonical names — rename downloads to
|
||||||
|
match if a mirror uses different ones.
|
||||||
|
|
||||||
|
VRAM peak: ~8.5 GB (4B fp8). With Ollama parked at ~8 GB this still fits;
|
||||||
|
unlike FLUX.1-schnell, klein doesn't require stopping Ollama on mRock.
|
||||||
|
|
||||||
|
### SD3.5-medium — single-checkpoint variant
|
||||||
|
|
||||||
|
Stability AI's 2.5B mid-size model with bundled text encoders. The
|
||||||
|
`incl_clips_t5xxlfp8scaled` variant ships clip_g + clip_l + t5xxl_fp8 all
|
||||||
|
in one `.safetensors`, so the workflow uses `CheckpointLoaderSimple`
|
||||||
|
instead of separate UNet/VAE/CLIP loaders.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
sd35-medium-local:
|
||||||
|
type: comfyui
|
||||||
|
base_url: http://mrock:8188
|
||||||
|
workflow: sd35-medium
|
||||||
|
model: sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors # models/checkpoints/
|
||||||
|
default_steps: 28
|
||||||
|
default_sampler: dpmpp_2m
|
||||||
|
default_scheduler: sgm_uniform
|
||||||
|
default_cfg: 4.5
|
||||||
|
shift: 3.0
|
||||||
|
```
|
||||||
|
|
||||||
|
**Model download** (on mRock):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/dev/comfyui/models
|
||||||
|
curl -L -o checkpoints/sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors \
|
||||||
|
https://huggingface.co/stabilityai/stable-diffusion-3.5-medium/resolve/main/sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors
|
||||||
|
```
|
||||||
|
|
||||||
|
VRAM peak: ~9.9 GB at 1024×1024. Same envelope as FLUX.1-schnell — stop
|
||||||
|
Ollama before generating, restart after.
|
||||||
|
|
||||||
|
## Adding a new bundled workflow
|
||||||
|
|
||||||
|
1. **Export from ComfyUI**: load the model in the ComfyUI GUI, build a
|
||||||
|
text-to-image workflow that produces what you want, "Save (API
|
||||||
|
Format)" — the file you get is the right shape.
|
||||||
|
2. **Sprinkle placeholders**: open the JSON and replace per-call values
|
||||||
|
with `${name}` tokens. Whole-value substitution only:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"inputs": {
|
||||||
|
"text": "${prompt}", // was "a cat sitting on a chair"
|
||||||
|
"seed": "${seed}", // was 1234567
|
||||||
|
"steps": "${steps}", // was 28
|
||||||
|
"cfg": "${cfg}",
|
||||||
|
"sampler_name": "${sampler}",
|
||||||
|
"scheduler": "${scheduler}",
|
||||||
|
"width": "${width}",
|
||||||
|
"height": "${height}"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Use `${model}` for the checkpoint / unet filename and any per-template
|
||||||
|
knobs (`${vae}`, `${shift}`, `${guidance}`, `${clip}` …).
|
||||||
|
3. **Drop it into `internal/backend/workflows/<name>.json`**. The
|
||||||
|
`//go:embed workflows/*.json` directive in `workflow_template.go`
|
||||||
|
picks it up at build time — no registry entry needed.
|
||||||
|
4. **Add a yaml instance** in `internal/config/config.go`'s `Sample` block
|
||||||
|
for `imagen config init` (and `~/.config/imagen.yaml`) so users
|
||||||
|
discover the new backend.
|
||||||
|
5. **Document the model files + HF download URLs** in this doc.
|
||||||
|
6. **Smoke test**: `imagen generate "test" --backend <new-instance>
|
||||||
|
--size 1024x1024` should produce an image.
|
||||||
|
|
||||||
|
Per-call overrides for sampler/scheduler/cfg go via `--steps`, `--seed`,
|
||||||
|
and (programmatic) `backend.Request.BackendOpts["sampler"]` /
|
||||||
|
`["scheduler"]` / `["cfg"]`. The compare harness forwards the
|
||||||
|
constant-across-backends knobs verbatim.
|
||||||
|
|
||||||
|
## Loading a workflow from disk (one-off)
|
||||||
|
|
||||||
|
Pass an absolute filesystem path as `workflow:` and the adapter reads it
|
||||||
|
from disk instead of the embedded FS. Handy for prototyping a new model
|
||||||
|
before committing it:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
my-experimental:
|
||||||
|
type: comfyui
|
||||||
|
base_url: http://mrock:8188
|
||||||
|
workflow: /home/m/dev/comfyui/workflows/my-test.json
|
||||||
|
model: my-test-model.safetensors
|
||||||
|
```
|
||||||
|
|
||||||
|
The fallback chain is: filesystem path (if the string looks like a path
|
||||||
|
or ends in `.json`), then bundled lookup by name, then bundled lookup
|
||||||
|
with `.json` appended.
|
||||||
|
|
||||||
|
## `imagen compare`: cross-backend evaluation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
imagen compare "a wizard casting a spell" \
|
||||||
|
--models flux-schnell-local,flux2-klein-local,sd35-medium-local \
|
||||||
|
--size 1024x1024 \
|
||||||
|
--output ~/Pictures/imagen/compare
|
||||||
|
```
|
||||||
|
|
||||||
|
Per run, `compare`:
|
||||||
|
|
||||||
|
- creates `<output>/<YYYYMMDD-HHMMSS>-<prompt-slug>/`
|
||||||
|
- dispatches each named backend sequentially (mRock has one GPU; parallel
|
||||||
|
would OOM) — one backend's failure doesn't abort the run
|
||||||
|
- writes per-backend PNGs as `<prompt-slug>--<backend-slug>.png`
|
||||||
|
- writes `compare.json` listing every attempt (success + failure) with
|
||||||
|
per-model `seed`, `latency_ms`, `model`, `vram_used_mib`, full
|
||||||
|
`metadata` map, and the error string for any failure
|
||||||
|
- composites a `contact-sheet.png` with the prompt as header and each
|
||||||
|
cell labelled `<backend>` / `<latency>ms · seed <n>`
|
||||||
|
|
||||||
|
Flags mirror `generate`: `--seed`, `--steps`, `--style`, `--negative`,
|
||||||
|
`--size` are shared across all backends. `--no-contact-sheet` skips the
|
||||||
|
composite when only the per-image PNGs and sidecar matter (e.g. for a
|
||||||
|
worker script that builds its own diff view).
|
||||||
|
|
||||||
|
## Diagnostics
|
||||||
|
|
||||||
|
`imagen backends` shows every instance with its registration state. For
|
||||||
|
local ComfyUI, the status is currently just `registered` (we don't probe
|
||||||
|
the upstream HTTP endpoint at startup — the boot-helper hint kicks in on
|
||||||
|
first generation if mRock is asleep).
|
||||||
|
|
||||||
|
Per-backend errors emit at most three kinds:
|
||||||
|
|
||||||
|
1. **Adapter construction failure** (e.g. workflow JSON not found,
|
||||||
|
missing required yaml field). Caught at `buildBackend` time:
|
||||||
|
`imagen: backend "<name>": <err>`.
|
||||||
|
2. **HTTP / runtime failure during Generate**. Wrapped with the boot
|
||||||
|
helper for `connection refused`/`no such host`/timeouts pointing at
|
||||||
|
`boot-whitetower mrock` so a sleeping mRock has an obvious next step.
|
||||||
|
3. **ComfyUI workflow-validation failure** (200-with-node_errors or 400).
|
||||||
|
Surfaces with a model-not-found hint (matching `value_not_in_list` +
|
||||||
|
`unet_name`/`ckpt_name`) when applicable, pointing back at this doc.
|
||||||
|
|
||||||
|
## Worker daemon notes
|
||||||
|
|
||||||
|
`imagen worker` (the `imagen.jobs` queue consumer) uses the same adapter
|
||||||
|
+ workflow lookup as the synchronous CLI — flexsiebels' `/imagine` UI
|
||||||
|
INSERTs a `backend = <instance>` row, the worker claims it, and the
|
||||||
|
underlying ComfyUI HTTP calls are identical to what `generate` makes. No
|
||||||
|
worker-specific changes are required when a new backend lands; the
|
||||||
|
config + workflow are the only state that has to be present on the
|
||||||
|
worker host.
|
||||||
|
|
||||||
|
After merging a new template or yaml block:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On the worker host (mRiver today):
|
||||||
|
systemctl --user restart imagen-worker
|
||||||
|
```
|
||||||
|
|
||||||
|
The daemon-rebuild trap from issue #9 still applies: if you build the
|
||||||
|
imagen binary on the dev machine and `scp` it over, restart the unit so
|
||||||
|
systemd picks up the new ELF.
|
||||||
@@ -4,14 +4,21 @@
|
|||||||
|
|
||||||
```
|
```
|
||||||
imagen generate <prompt> [flags] generate one image
|
imagen generate <prompt> [flags] generate one image
|
||||||
|
imagen compare <prompt> --models a,b,c [flags]
|
||||||
|
run one prompt across N backends + contact sheet
|
||||||
|
imagen worker [flags] consume the imagen.jobs queue (daemon)
|
||||||
imagen backends list configured + registered backends
|
imagen backends list configured + registered backends
|
||||||
imagen config init print a sample imagen.yaml on stdout
|
imagen config init print a sample imagen.yaml on stdout
|
||||||
imagen config validate parse + validate the active config
|
imagen config validate parse + validate the active config
|
||||||
imagen config path print the resolved config path
|
imagen config path print the resolved config path
|
||||||
imagen serve [--addr :8080] (stub) start the HTTP server
|
imagen serve [--addr :8080] (stub) start the HTTP server
|
||||||
|
imagen usage [--since DATE] show cost-tracking rows
|
||||||
imagen version print version
|
imagen version print version
|
||||||
```
|
```
|
||||||
|
|
||||||
|
For the per-backend setup (FLUX.1, FLUX.2 [klein], SD3.5 medium, …) and
|
||||||
|
the architecture rationale, see [`backends.md`](backends.md).
|
||||||
|
|
||||||
## `generate` flags
|
## `generate` flags
|
||||||
|
|
||||||
| Flag | Default | Notes |
|
| Flag | Default | Notes |
|
||||||
|
|||||||
3
go.mod
3
go.mod
@@ -4,6 +4,7 @@ go 1.25.0
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/jackc/pgx/v5 v5.9.2
|
github.com/jackc/pgx/v5 v5.9.2
|
||||||
|
golang.org/x/image v0.40.0
|
||||||
gopkg.in/yaml.v3 v3.0.1
|
gopkg.in/yaml.v3 v3.0.1
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -12,5 +13,5 @@ require (
|
|||||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||||
github.com/kr/text v0.2.0 // indirect
|
github.com/kr/text v0.2.0 // indirect
|
||||||
github.com/rogpeppe/go-internal v1.14.1 // indirect
|
github.com/rogpeppe/go-internal v1.14.1 // indirect
|
||||||
golang.org/x/text v0.29.0 // indirect
|
golang.org/x/text v0.37.0 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
10
go.sum
10
go.sum
@@ -23,10 +23,12 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
|
|||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
|
golang.org/x/image v0.40.0 h1:Tw4GyDXMo+daZN1znreBRC3VayR1aLFUyUEOLUdW1a8=
|
||||||
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
golang.org/x/image v0.40.0/go.mod h1:uIc348UZMSvS5Z65CVZ7iDPaNobNFEPeJ4kbqTOszmA=
|
||||||
golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
|
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||||
golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
|
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||||
|
golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc=
|
||||||
|
golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||||
|
|||||||
@@ -20,8 +20,9 @@ import (
|
|||||||
const ComfyType = "comfyui"
|
const ComfyType = "comfyui"
|
||||||
|
|
||||||
// Comfy is the ComfyUI adapter. It speaks the public `/prompt` + `/history`
|
// Comfy is the ComfyUI adapter. It speaks the public `/prompt` + `/history`
|
||||||
// + `/view` HTTP API and submits a fixed FLUX.1 schnell workflow built from
|
// + `/view` HTTP API and submits a workflow built by substituting Request
|
||||||
// the values in Request.
|
// values into a JSON template (bundled under internal/backend/workflows/ or
|
||||||
|
// loaded from a filesystem path).
|
||||||
//
|
//
|
||||||
// Concurrency: a single Comfy is safe to share across goroutines as long as
|
// Concurrency: a single Comfy is safe to share across goroutines as long as
|
||||||
// the underlying http.Client is. Generate does not hold long-lived state.
|
// the underlying http.Client is. Generate does not hold long-lived state.
|
||||||
@@ -29,15 +30,19 @@ type Comfy struct {
|
|||||||
instance string
|
instance string
|
||||||
|
|
||||||
base string
|
base string
|
||||||
model string
|
workflow string
|
||||||
vae string
|
|
||||||
clipL string
|
// rawCfg keeps the original yaml block (minus framework keys) so we can
|
||||||
clipT5 string
|
// expose every user-defined string/number as a workflow substitution
|
||||||
dtype string
|
// without enumerating each per-model knob in Go. Empty values still get
|
||||||
|
// a substitution entry so a template can reference ${negative} when the
|
||||||
|
// request didn't pass one.
|
||||||
|
rawCfg map[string]any
|
||||||
|
|
||||||
defaultSteps int
|
defaultSteps int
|
||||||
defaultSampler string
|
defaultSampler string
|
||||||
defaultScheduler string
|
defaultScheduler string
|
||||||
|
defaultCFG float64
|
||||||
|
|
||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
pollInterval time.Duration
|
pollInterval time.Duration
|
||||||
@@ -49,12 +54,20 @@ type Comfy struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewComfy is the registry constructor. cfg is the adapter's slice of
|
// NewComfy is the registry constructor. cfg is the adapter's slice of
|
||||||
// imagen.yaml. Required keys: base_url, model. The rest have sensible FLUX
|
// imagen.yaml.
|
||||||
// schnell defaults.
|
//
|
||||||
|
// Required keys: base_url, model.
|
||||||
|
// Optional keys: workflow (defaults to "flux1-schnell" for back-compat with
|
||||||
|
// existing configs), default_steps, default_sampler, default_scheduler,
|
||||||
|
// default_cfg, plus any template-specific knobs (vae, clip, clip_l,
|
||||||
|
// clip_t5, dtype, shift, guidance, …) the chosen workflow references.
|
||||||
func NewComfy(name string, cfg map[string]any) (Backend, error) {
|
func NewComfy(name string, cfg map[string]any) (Backend, error) {
|
||||||
if name == "" {
|
if name == "" {
|
||||||
return nil, fmt.Errorf("comfyui: empty instance name")
|
return nil, fmt.Errorf("comfyui: empty instance name")
|
||||||
}
|
}
|
||||||
|
if cfg == nil {
|
||||||
|
cfg = map[string]any{}
|
||||||
|
}
|
||||||
base := strings.TrimRight(getString(cfg, "base_url", ""), "/")
|
base := strings.TrimRight(getString(cfg, "base_url", ""), "/")
|
||||||
if base == "" {
|
if base == "" {
|
||||||
return nil, fmt.Errorf("comfyui[%s]: base_url is required", name)
|
return nil, fmt.Errorf("comfyui[%s]: base_url is required", name)
|
||||||
@@ -67,23 +80,27 @@ func NewComfy(name string, cfg map[string]any) (Backend, error) {
|
|||||||
return nil, fmt.Errorf("comfyui[%s]: model is required", name)
|
return nil, fmt.Errorf("comfyui[%s]: model is required", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
workflow := getString(cfg, "workflow", "flux1-schnell")
|
||||||
|
// Fail fast on a bad workflow ref so users see the error at startup,
|
||||||
|
// not on first /prompt submission.
|
||||||
|
if _, err := LoadWorkflowTemplate(workflow); err != nil {
|
||||||
|
return nil, fmt.Errorf("comfyui[%s]: %w", name, err)
|
||||||
|
}
|
||||||
|
|
||||||
c := &Comfy{
|
c := &Comfy{
|
||||||
instance: name,
|
instance: name,
|
||||||
base: base,
|
base: base,
|
||||||
model: model,
|
workflow: workflow,
|
||||||
|
rawCfg: cfg,
|
||||||
vae: getString(cfg, "vae", "ae.safetensors"),
|
|
||||||
clipL: getString(cfg, "clip_l", "clip_l.safetensors"),
|
|
||||||
clipT5: getString(cfg, "clip_t5", "t5xxl_fp8_e4m3fn.safetensors"),
|
|
||||||
dtype: getString(cfg, "weight_dtype", "fp8_e4m3fn"),
|
|
||||||
|
|
||||||
defaultSteps: getInt(cfg, "default_steps", 4),
|
defaultSteps: getInt(cfg, "default_steps", 4),
|
||||||
defaultSampler: getString(cfg, "default_sampler", "euler"),
|
defaultSampler: getString(cfg, "default_sampler", "euler"),
|
||||||
defaultScheduler: getString(cfg, "default_scheduler", "simple"),
|
defaultScheduler: getString(cfg, "default_scheduler", "simple"),
|
||||||
|
defaultCFG: getFloat(cfg, "default_cfg", 1.0),
|
||||||
|
|
||||||
httpClient: &http.Client{Timeout: 60 * time.Second},
|
httpClient: &http.Client{Timeout: 60 * time.Second},
|
||||||
pollInterval: 250 * time.Millisecond,
|
pollInterval: 250 * time.Millisecond,
|
||||||
pollTimeout: 120 * time.Second,
|
pollTimeout: 300 * time.Second,
|
||||||
|
|
||||||
randSeed: cryptoSeed,
|
randSeed: cryptoSeed,
|
||||||
clientIDFn: randClientID,
|
clientIDFn: randClientID,
|
||||||
@@ -103,19 +120,26 @@ func (c *Comfy) Generate(ctx context.Context, req Request) (*Result, error) {
|
|||||||
|
|
||||||
sampler := c.defaultSampler
|
sampler := c.defaultSampler
|
||||||
scheduler := c.defaultScheduler
|
scheduler := c.defaultScheduler
|
||||||
|
cfg := c.defaultCFG
|
||||||
if v, ok := req.BackendOpts["sampler"].(string); ok && v != "" {
|
if v, ok := req.BackendOpts["sampler"].(string); ok && v != "" {
|
||||||
sampler = v
|
sampler = v
|
||||||
}
|
}
|
||||||
if v, ok := req.BackendOpts["scheduler"].(string); ok && v != "" {
|
if v, ok := req.BackendOpts["scheduler"].(string); ok && v != "" {
|
||||||
scheduler = v
|
scheduler = v
|
||||||
}
|
}
|
||||||
|
if v, ok := req.BackendOpts["cfg"].(float64); ok && v > 0 {
|
||||||
|
cfg = v
|
||||||
|
}
|
||||||
|
|
||||||
seed := req.Seed
|
seed := req.Seed
|
||||||
if seed == 0 {
|
if seed == 0 {
|
||||||
seed = c.randSeed()
|
seed = c.randSeed()
|
||||||
}
|
}
|
||||||
|
|
||||||
workflow := c.buildWorkflow(req.Prompt, req.NegativePrompt, width, height, seed, steps, sampler, scheduler)
|
workflow, err := c.buildWorkflow(req.Prompt, req.NegativePrompt, width, height, seed, steps, sampler, scheduler, cfg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("comfyui[%s]: build workflow: %w", c.instance, err)
|
||||||
|
}
|
||||||
clientID := c.clientIDFn()
|
clientID := c.clientIDFn()
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
@@ -133,14 +157,17 @@ func (c *Comfy) Generate(ctx context.Context, req Request) (*Result, error) {
|
|||||||
}
|
}
|
||||||
latencyMs := time.Since(start).Milliseconds()
|
latencyMs := time.Since(start).Milliseconds()
|
||||||
|
|
||||||
|
model := getString(c.rawCfg, "model", "")
|
||||||
meta := map[string]any{
|
meta := map[string]any{
|
||||||
"backend": c.instance,
|
"backend": c.instance,
|
||||||
"backend_type": ComfyType,
|
"backend_type": ComfyType,
|
||||||
"model": c.model,
|
"workflow": c.workflow,
|
||||||
|
"model": model,
|
||||||
"seed": seed,
|
"seed": seed,
|
||||||
"steps": steps,
|
"steps": steps,
|
||||||
"sampler": sampler,
|
"sampler": sampler,
|
||||||
"scheduler": scheduler,
|
"scheduler": scheduler,
|
||||||
|
"cfg": cfg,
|
||||||
"width": width,
|
"width": width,
|
||||||
"height": height,
|
"height": height,
|
||||||
"latency_ms": latencyMs,
|
"latency_ms": latencyMs,
|
||||||
@@ -173,6 +200,7 @@ func (c *Comfy) submitPrompt(ctx context.Context, workflow map[string]any, clien
|
|||||||
return "", fmt.Errorf("comfyui: marshal workflow: %w", err)
|
return "", fmt.Errorf("comfyui: marshal workflow: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
model := getString(c.rawCfg, "model", "")
|
||||||
var lastErr error
|
var lastErr error
|
||||||
for attempt := range 2 {
|
for attempt := range 2 {
|
||||||
if attempt > 0 {
|
if attempt > 0 {
|
||||||
@@ -196,7 +224,7 @@ func (c *Comfy) submitPrompt(ctx context.Context, workflow map[string]any, clien
|
|||||||
_ = resp.Body.Close()
|
_ = resp.Body.Close()
|
||||||
switch {
|
switch {
|
||||||
case resp.StatusCode >= 200 && resp.StatusCode < 300:
|
case resp.StatusCode >= 200 && resp.StatusCode < 300:
|
||||||
return parsePromptID(respBody, c.model)
|
return parsePromptID(respBody, model)
|
||||||
case resp.StatusCode >= 500:
|
case resp.StatusCode >= 500:
|
||||||
lastErr = fmt.Errorf("comfyui /prompt %d: %s", resp.StatusCode, snip(respBody))
|
lastErr = fmt.Errorf("comfyui /prompt %d: %s", resp.StatusCode, snip(respBody))
|
||||||
continue
|
continue
|
||||||
@@ -333,98 +361,74 @@ func (c *Comfy) connError(err error) error {
|
|||||||
// workflow-validation failures and put the diagnostics in node_errors; older
|
// workflow-validation failures and put the diagnostics in node_errors; older
|
||||||
// builds use 200 + node_errors. This handles the 4xx flavour.
|
// builds use 200 + node_errors. This handles the 4xx flavour.
|
||||||
func (c *Comfy) classifyBadRequest(status int, body []byte) error {
|
func (c *Comfy) classifyBadRequest(status int, body []byte) error {
|
||||||
if hint, ok := missingModelHint(body, c.model); ok {
|
model := getString(c.rawCfg, "model", "")
|
||||||
return fmt.Errorf("comfyui /prompt %d: %s — see docs/setup-comfyui-mrock.md", status, hint)
|
if hint, ok := missingModelHint(body, model); ok {
|
||||||
|
return fmt.Errorf("comfyui /prompt %d: %s — see docs/backends.md", status, hint)
|
||||||
}
|
}
|
||||||
return fmt.Errorf("comfyui /prompt %d: %s", status, snip(body))
|
return fmt.Errorf("comfyui /prompt %d: %s", status, snip(body))
|
||||||
}
|
}
|
||||||
|
|
||||||
// buildWorkflow assembles the canonical FLUX.1 schnell ComfyUI workflow,
|
// buildWorkflow loads the configured workflow template and substitutes the
|
||||||
// node-IDs matching the upstream "flux-schnell" template so anyone debugging
|
// per-call placeholders (prompt, seed, sampler, …) plus any string/number
|
||||||
// in the ComfyUI UI sees a familiar shape.
|
// fields the user defined in the yaml block. The set of placeholder keys
|
||||||
func (c *Comfy) buildWorkflow(prompt, negative string, w, h int, seed int64, steps int, sampler, scheduler string) map[string]any {
|
// that aren't in `subs` produces an error from SubstituteWorkflow.
|
||||||
return map[string]any{
|
func (c *Comfy) buildWorkflow(prompt, negative string, w, h int, seed int64, steps int, sampler, scheduler string, cfg float64) (map[string]any, error) {
|
||||||
"6": map[string]any{
|
wf, err := LoadWorkflowTemplate(c.workflow)
|
||||||
"class_type": "CLIPTextEncode",
|
if err != nil {
|
||||||
"inputs": map[string]any{
|
return nil, err
|
||||||
"text": prompt,
|
}
|
||||||
"clip": []any{"11", 0},
|
subs := map[string]any{
|
||||||
},
|
"prompt": prompt,
|
||||||
},
|
"negative": negative,
|
||||||
"8": map[string]any{
|
|
||||||
"class_type": "VAEDecode",
|
|
||||||
"inputs": map[string]any{
|
|
||||||
"samples": []any{"31", 0},
|
|
||||||
"vae": []any{"10", 0},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"9": map[string]any{
|
|
||||||
"class_type": "SaveImage",
|
|
||||||
"inputs": map[string]any{
|
|
||||||
"filename_prefix": "imagen",
|
|
||||||
"images": []any{"8", 0},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"10": map[string]any{
|
|
||||||
"class_type": "VAELoader",
|
|
||||||
"inputs": map[string]any{"vae_name": c.vae},
|
|
||||||
},
|
|
||||||
"11": map[string]any{
|
|
||||||
"class_type": "DualCLIPLoader",
|
|
||||||
"inputs": map[string]any{
|
|
||||||
"clip_name1": c.clipT5,
|
|
||||||
"clip_name2": c.clipL,
|
|
||||||
"type": "flux",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"12": map[string]any{
|
|
||||||
"class_type": "UNETLoader",
|
|
||||||
"inputs": map[string]any{
|
|
||||||
"unet_name": c.model,
|
|
||||||
"weight_dtype": c.dtype,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"13": map[string]any{
|
|
||||||
"class_type": "CLIPTextEncode",
|
|
||||||
"inputs": map[string]any{
|
|
||||||
"text": negative,
|
|
||||||
"clip": []any{"11", 0},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"27": map[string]any{
|
|
||||||
"class_type": "EmptySD3LatentImage",
|
|
||||||
"inputs": map[string]any{
|
|
||||||
"width": w,
|
"width": w,
|
||||||
"height": h,
|
"height": h,
|
||||||
"batch_size": 1,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"30": map[string]any{
|
|
||||||
"class_type": "ModelSamplingFlux",
|
|
||||||
"inputs": map[string]any{
|
|
||||||
"model": []any{"12", 0},
|
|
||||||
"max_shift": 1.15,
|
|
||||||
"base_shift": 0.5,
|
|
||||||
"width": w,
|
|
||||||
"height": h,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"31": map[string]any{
|
|
||||||
"class_type": "KSampler",
|
|
||||||
"inputs": map[string]any{
|
|
||||||
"model": []any{"30", 0},
|
|
||||||
"seed": seed,
|
"seed": seed,
|
||||||
"steps": steps,
|
"steps": steps,
|
||||||
"cfg": 1.0,
|
"sampler": sampler,
|
||||||
"sampler_name": sampler,
|
|
||||||
"scheduler": scheduler,
|
"scheduler": scheduler,
|
||||||
"denoise": 1.0,
|
"cfg": cfg,
|
||||||
"positive": []any{"6", 0},
|
|
||||||
"negative": []any{"13", 0},
|
|
||||||
"latent_image": []any{"27", 0},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
// Surface every scalar field from the yaml block so per-template knobs
|
||||||
|
// (vae, clip, clip_l, clip_t5, dtype, shift, guidance, …) work without
|
||||||
|
// adapter-code changes. Framework keys are excluded.
|
||||||
|
for k, v := range c.rawCfg {
|
||||||
|
switch k {
|
||||||
|
case "type", "base_url", "workflow",
|
||||||
|
"default_steps", "default_sampler", "default_scheduler", "default_cfg":
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, alreadySet := subs[k]; alreadySet {
|
||||||
|
// A per-call var (e.g. ${prompt}) beats anything yaml put under
|
||||||
|
// the same key — yaml can't shadow request-derived values.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch v := v.(type) {
|
||||||
|
case string, int, int64, float64, bool:
|
||||||
|
subs[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Provide sensible defaults for common optional knobs so a workflow that
|
||||||
|
// references one of these doesn't fail substitution when the user
|
||||||
|
// didn't override it in yaml. Extra keys are ignored if the workflow
|
||||||
|
// doesn't reference them, so it's safe to always set the lot.
|
||||||
|
defaults := map[string]any{
|
||||||
|
"vae": "ae.safetensors",
|
||||||
|
"clip_l": "clip_l.safetensors",
|
||||||
|
"clip_t5": "t5xxl_fp8_e4m3fn.safetensors",
|
||||||
|
"clip": "qwen_3_4b.safetensors",
|
||||||
|
"dtype": "fp8_e4m3fn",
|
||||||
|
"guidance": 4.0,
|
||||||
|
"shift": 3.0,
|
||||||
|
}
|
||||||
|
for k, v := range defaults {
|
||||||
|
if _, ok := subs[k]; !ok {
|
||||||
|
subs[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if _, err := SubstituteWorkflow(wf, subs); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return wf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// parsePromptID handles the 2xx /prompt response. ComfyUI sometimes 200s a
|
// parsePromptID handles the 2xx /prompt response. ComfyUI sometimes 200s a
|
||||||
@@ -441,7 +445,7 @@ func parsePromptID(body []byte, model string) (string, error) {
|
|||||||
}
|
}
|
||||||
if len(resp.NodeErrors) > 0 || len(resp.Error) > 0 {
|
if len(resp.NodeErrors) > 0 || len(resp.Error) > 0 {
|
||||||
if hint, ok := missingModelHint(body, model); ok {
|
if hint, ok := missingModelHint(body, model); ok {
|
||||||
return "", fmt.Errorf("comfyui /prompt: %s — see docs/setup-comfyui-mrock.md", hint)
|
return "", fmt.Errorf("comfyui /prompt: %s — see docs/backends.md", hint)
|
||||||
}
|
}
|
||||||
return "", fmt.Errorf("comfyui /prompt rejected workflow: %s", snip(body))
|
return "", fmt.Errorf("comfyui /prompt rejected workflow: %s", snip(body))
|
||||||
}
|
}
|
||||||
@@ -489,15 +493,21 @@ func parseHistory(body []byte, promptID string) (string, bool, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// missingModelHint returns a user-actionable message when the response body
|
// missingModelHint returns a user-actionable message when the response body
|
||||||
// indicates the configured unet model isn't loaded on the server. ComfyUI
|
// indicates the configured unet/checkpoint model isn't loaded on the server.
|
||||||
// uses both the human-readable "Value not in list" message and the enum
|
// ComfyUI uses both the human-readable "Value not in list" message and the
|
||||||
// "value_not_in_list" type — match either.
|
// enum "value_not_in_list" type — match either.
|
||||||
func missingModelHint(body []byte, model string) (string, bool) {
|
func missingModelHint(body []byte, model string) (string, bool) {
|
||||||
s := string(body)
|
s := string(body)
|
||||||
hasMarker := strings.Contains(s, "Value not in list") || strings.Contains(s, "value_not_in_list")
|
hasMarker := strings.Contains(s, "Value not in list") || strings.Contains(s, "value_not_in_list")
|
||||||
if hasMarker && strings.Contains(s, "unet_name") {
|
if !hasMarker {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
if strings.Contains(s, "unet_name") {
|
||||||
return fmt.Sprintf("model %q not present in the ComfyUI server's models/unet/", model), true
|
return fmt.Sprintf("model %q not present in the ComfyUI server's models/unet/", model), true
|
||||||
}
|
}
|
||||||
|
if strings.Contains(s, "ckpt_name") {
|
||||||
|
return fmt.Sprintf("checkpoint %q not present in the ComfyUI server's models/checkpoints/", model), true
|
||||||
|
}
|
||||||
return "", false
|
return "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -536,6 +546,22 @@ func getInt(m map[string]any, k string, def int) int {
|
|||||||
return def
|
return def
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getFloat(m map[string]any, k string, def float64) float64 {
|
||||||
|
if v, ok := m[k]; ok {
|
||||||
|
switch n := v.(type) {
|
||||||
|
case float64:
|
||||||
|
return n
|
||||||
|
case float32:
|
||||||
|
return float64(n)
|
||||||
|
case int:
|
||||||
|
return float64(n)
|
||||||
|
case int64:
|
||||||
|
return float64(n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
func orDefaultInt(v, def int) int {
|
func orDefaultInt(v, def int) int {
|
||||||
if v == 0 {
|
if v == 0 {
|
||||||
return def
|
return def
|
||||||
|
|||||||
@@ -312,7 +312,7 @@ func TestComfyMissingModelHintsAtSetupDoc(t *testing.T) {
|
|||||||
t.Fatal("expected error")
|
t.Fatal("expected error")
|
||||||
}
|
}
|
||||||
msg := err.Error()
|
msg := err.Error()
|
||||||
if !strings.Contains(msg, "docs/setup-comfyui-mrock.md") {
|
if !strings.Contains(msg, "docs/backends.md") {
|
||||||
t.Errorf("error should point at the setup doc, got %v", err)
|
t.Errorf("error should point at the setup doc, got %v", err)
|
||||||
}
|
}
|
||||||
if !strings.Contains(msg, "flux1-schnell.safetensors") {
|
if !strings.Contains(msg, "flux1-schnell.safetensors") {
|
||||||
@@ -331,7 +331,7 @@ func TestComfyMissingModelOn200WithNodeErrors(t *testing.T) {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("expected error for node_errors on 200")
|
t.Fatal("expected error for node_errors on 200")
|
||||||
}
|
}
|
||||||
if !strings.Contains(err.Error(), "docs/setup-comfyui-mrock.md") {
|
if !strings.Contains(err.Error(), "docs/backends.md") {
|
||||||
t.Errorf("error should point at the setup doc, got %v", err)
|
t.Errorf("error should point at the setup doc, got %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
156
internal/backend/workflow_template.go
Normal file
156
internal/backend/workflow_template.go
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"embed"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"maps"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed workflows/*.json
|
||||||
|
var bundledWorkflows embed.FS
|
||||||
|
|
||||||
|
// placeholderRE matches a single-token placeholder like "${prompt}" — the
|
||||||
|
// whole string value must be the placeholder, leading/trailing whitespace
|
||||||
|
// allowed. This lets us preserve types (a numeric substitution becomes a
|
||||||
|
// JSON number, not a stringified one) instead of round-tripping through
|
||||||
|
// strings.Replace which would force everything into a string.
|
||||||
|
var placeholderRE = regexp.MustCompile(`^\s*\$\{([a-zA-Z][a-zA-Z0-9_]*)\}\s*$`)
|
||||||
|
|
||||||
|
// LoadWorkflowTemplate returns the parsed JSON for a workflow template.
|
||||||
|
// `name` is resolved in this order:
|
||||||
|
//
|
||||||
|
// 1. exact filesystem path that exists on disk (absolute or relative);
|
||||||
|
// 2. one of the bundled templates under internal/backend/workflows/
|
||||||
|
// (with or without the .json suffix).
|
||||||
|
//
|
||||||
|
// The returned map is a fresh deep copy of the template; callers can mutate
|
||||||
|
// it freely.
|
||||||
|
func LoadWorkflowTemplate(name string) (map[string]any, error) {
|
||||||
|
if name == "" {
|
||||||
|
return nil, fmt.Errorf("workflow template name is empty")
|
||||||
|
}
|
||||||
|
raw, err := readWorkflowBytes(name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var wf map[string]any
|
||||||
|
if err := json.Unmarshal(raw, &wf); err != nil {
|
||||||
|
return nil, fmt.Errorf("workflow %s: parse: %w", name, err)
|
||||||
|
}
|
||||||
|
return wf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// BundledWorkflowNames returns the names of templates compiled into the
|
||||||
|
// binary, sorted. Each name is the basename without the .json suffix.
|
||||||
|
func BundledWorkflowNames() []string {
|
||||||
|
entries, err := fs.ReadDir(bundledWorkflows, "workflows")
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
out := make([]string, 0, len(entries))
|
||||||
|
for _, e := range entries {
|
||||||
|
n := e.Name()
|
||||||
|
if !strings.HasSuffix(n, ".json") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, strings.TrimSuffix(n, ".json"))
|
||||||
|
}
|
||||||
|
sort.Strings(out)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func readWorkflowBytes(name string) ([]byte, error) {
|
||||||
|
// Filesystem path wins if it points at a real file. Lets a user override
|
||||||
|
// a bundled template by passing an absolute path in yaml.
|
||||||
|
if strings.ContainsRune(name, os.PathSeparator) || strings.HasSuffix(name, ".json") {
|
||||||
|
if b, err := os.ReadFile(name); err == nil {
|
||||||
|
return b, nil
|
||||||
|
} else if !os.IsNotExist(err) {
|
||||||
|
return nil, fmt.Errorf("workflow %s: %w", name, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Bundled lookup. Try the literal name as a file inside workflows/, then
|
||||||
|
// with the .json suffix appended.
|
||||||
|
candidates := []string{
|
||||||
|
filepath.Join("workflows", name),
|
||||||
|
filepath.Join("workflows", name+".json"),
|
||||||
|
}
|
||||||
|
for _, c := range candidates {
|
||||||
|
if b, err := bundledWorkflows.ReadFile(c); err == nil {
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("workflow %q not found (bundled templates: %v)", name, BundledWorkflowNames())
|
||||||
|
}
|
||||||
|
|
||||||
|
// SubstituteWorkflow walks wf and replaces every "${key}" string with the
|
||||||
|
// matching value from subs, preserving JSON types. Returns the set of
|
||||||
|
// placeholder keys it actually touched, so the caller can detect missing
|
||||||
|
// substitutions even when a key is defined in subs but never referenced in
|
||||||
|
// the workflow (typical when a yaml block sets a knob a different template
|
||||||
|
// would consume).
|
||||||
|
//
|
||||||
|
// Unknown placeholders (referenced in the workflow but absent from subs)
|
||||||
|
// produce an error so we never submit a workflow with raw "${foo}" tokens.
|
||||||
|
func SubstituteWorkflow(wf map[string]any, subs map[string]any) (used map[string]struct{}, err error) {
|
||||||
|
used = make(map[string]struct{})
|
||||||
|
walked, err := substituteValue(wf, subs, used)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// substituteValue returns the replacement for the top-level value, which
|
||||||
|
// should still be the same map (just with mutated children).
|
||||||
|
if m, ok := walked.(map[string]any); ok {
|
||||||
|
// Copy back into wf so the caller's reference reflects the result.
|
||||||
|
for k := range wf {
|
||||||
|
delete(wf, k)
|
||||||
|
}
|
||||||
|
maps.Copy(wf, m)
|
||||||
|
}
|
||||||
|
return used, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func substituteValue(v any, subs map[string]any, used map[string]struct{}) (any, error) {
|
||||||
|
switch x := v.(type) {
|
||||||
|
case map[string]any:
|
||||||
|
out := make(map[string]any, len(x))
|
||||||
|
for k, child := range x {
|
||||||
|
replaced, err := substituteValue(child, subs, used)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out[k] = replaced
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
case []any:
|
||||||
|
out := make([]any, len(x))
|
||||||
|
for i, child := range x {
|
||||||
|
replaced, err := substituteValue(child, subs, used)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out[i] = replaced
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
case string:
|
||||||
|
if m := placeholderRE.FindStringSubmatch(x); m != nil {
|
||||||
|
key := m[1]
|
||||||
|
val, ok := subs[key]
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("workflow placeholder ${%s} has no substitution", key)
|
||||||
|
}
|
||||||
|
used[key] = struct{}{}
|
||||||
|
return val, nil
|
||||||
|
}
|
||||||
|
return x, nil
|
||||||
|
default:
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
153
internal/backend/workflow_template_test.go
Normal file
153
internal/backend/workflow_template_test.go
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBundledWorkflowsParseable(t *testing.T) {
|
||||||
|
names := BundledWorkflowNames()
|
||||||
|
if len(names) == 0 {
|
||||||
|
t.Fatal("expected at least one bundled workflow")
|
||||||
|
}
|
||||||
|
mustHave := []string{"flux1-schnell", "flux2-klein", "sd35-medium"}
|
||||||
|
for _, want := range mustHave {
|
||||||
|
if !slices.Contains(names, want) {
|
||||||
|
t.Errorf("bundled workflows missing %q (have: %v)", want, names)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Every bundled template must parse and contain at least one node.
|
||||||
|
for _, n := range names {
|
||||||
|
wf, err := LoadWorkflowTemplate(n)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("LoadWorkflowTemplate(%q): %v", n, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if len(wf) == 0 {
|
||||||
|
t.Errorf("workflow %q has zero nodes", n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadWorkflowFromFilesystem(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "custom.json")
|
||||||
|
body := `{"1":{"class_type":"X","inputs":{"v":"${prompt}"}}}`
|
||||||
|
if err := os.WriteFile(path, []byte(body), 0o644); err != nil {
|
||||||
|
t.Fatalf("write tmp workflow: %v", err)
|
||||||
|
}
|
||||||
|
wf, err := LoadWorkflowTemplate(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load from path: %v", err)
|
||||||
|
}
|
||||||
|
if _, ok := wf["1"]; !ok {
|
||||||
|
t.Errorf("custom workflow missing node 1")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadWorkflowUnknownNameErrors(t *testing.T) {
|
||||||
|
_, err := LoadWorkflowTemplate("definitely-not-a-real-workflow")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error for unknown workflow name")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "not found") {
|
||||||
|
t.Errorf("error should say not found, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubstituteWorkflowPreservesTypes(t *testing.T) {
|
||||||
|
wf := map[string]any{
|
||||||
|
"31": map[string]any{
|
||||||
|
"class_type": "KSampler",
|
||||||
|
"inputs": map[string]any{
|
||||||
|
"seed": "${seed}",
|
||||||
|
"steps": "${steps}",
|
||||||
|
"text": "${prompt}",
|
||||||
|
"cfg": "${cfg}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
subs := map[string]any{
|
||||||
|
"seed": int64(42),
|
||||||
|
"steps": 11,
|
||||||
|
"prompt": "a cat",
|
||||||
|
"cfg": 4.5,
|
||||||
|
}
|
||||||
|
used, err := SubstituteWorkflow(wf, subs)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Substitute: %v", err)
|
||||||
|
}
|
||||||
|
if len(used) != 4 {
|
||||||
|
t.Errorf("used = %v, want all four", used)
|
||||||
|
}
|
||||||
|
inputs := wf["31"].(map[string]any)["inputs"].(map[string]any)
|
||||||
|
if seed, ok := inputs["seed"].(int64); !ok || seed != 42 {
|
||||||
|
t.Errorf("seed = %T %v, want int64 42", inputs["seed"], inputs["seed"])
|
||||||
|
}
|
||||||
|
if steps, ok := inputs["steps"].(int); !ok || steps != 11 {
|
||||||
|
t.Errorf("steps = %T %v, want int 11", inputs["steps"], inputs["steps"])
|
||||||
|
}
|
||||||
|
if text, ok := inputs["text"].(string); !ok || text != "a cat" {
|
||||||
|
t.Errorf("text = %T %v, want string", inputs["text"], inputs["text"])
|
||||||
|
}
|
||||||
|
if cfg, ok := inputs["cfg"].(float64); !ok || cfg != 4.5 {
|
||||||
|
t.Errorf("cfg = %T %v, want float64 4.5", inputs["cfg"], inputs["cfg"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubstituteWorkflowMissingPlaceholderErrors(t *testing.T) {
|
||||||
|
wf := map[string]any{
|
||||||
|
"1": map[string]any{"inputs": map[string]any{"v": "${missing}"}},
|
||||||
|
}
|
||||||
|
_, err := SubstituteWorkflow(wf, map[string]any{})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error for missing placeholder")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "${missing}") {
|
||||||
|
t.Errorf("error should name the placeholder, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubstituteWorkflowOnlyWholeTokens(t *testing.T) {
|
||||||
|
// Partial-match strings ("prefix ${prompt} suffix") are NOT substituted —
|
||||||
|
// the placeholder must be the whole value so we can preserve types.
|
||||||
|
wf := map[string]any{
|
||||||
|
"1": map[string]any{"inputs": map[string]any{
|
||||||
|
"keep_string": "stuff with ${prompt} inside",
|
||||||
|
"replace_full": "${prompt}",
|
||||||
|
}},
|
||||||
|
}
|
||||||
|
used, err := SubstituteWorkflow(wf, map[string]any{"prompt": "x"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Substitute: %v", err)
|
||||||
|
}
|
||||||
|
inputs := wf["1"].(map[string]any)["inputs"].(map[string]any)
|
||||||
|
if inputs["keep_string"].(string) != "stuff with ${prompt} inside" {
|
||||||
|
t.Errorf("partial match should be left alone, got %q", inputs["keep_string"])
|
||||||
|
}
|
||||||
|
if inputs["replace_full"].(string) != "x" {
|
||||||
|
t.Errorf("full-value match should substitute, got %q", inputs["replace_full"])
|
||||||
|
}
|
||||||
|
if _, ok := used["prompt"]; !ok {
|
||||||
|
t.Errorf("used should track keys that fired")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlux1SchnellTemplateMatchesLegacyShape(t *testing.T) {
|
||||||
|
// Regression guard against the historical hardcoded workflow: every
|
||||||
|
// node ID the old Comfy.buildWorkflow used must still exist in the
|
||||||
|
// migrated template.
|
||||||
|
wf, err := LoadWorkflowTemplate("flux1-schnell")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load flux1-schnell: %v", err)
|
||||||
|
}
|
||||||
|
legacyNodes := []string{"6", "8", "9", "10", "11", "12", "13", "27", "30", "31"}
|
||||||
|
for _, id := range legacyNodes {
|
||||||
|
if _, ok := wf[id]; !ok {
|
||||||
|
t.Errorf("flux1-schnell template missing node %q (legacy parity)", id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
84
internal/backend/workflows/flux1-schnell.json
Normal file
84
internal/backend/workflows/flux1-schnell.json
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
{
|
||||||
|
"6": {
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"inputs": {
|
||||||
|
"text": "${prompt}",
|
||||||
|
"clip": ["11", 0]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"8": {
|
||||||
|
"class_type": "VAEDecode",
|
||||||
|
"inputs": {
|
||||||
|
"samples": ["31", 0],
|
||||||
|
"vae": ["10", 0]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"9": {
|
||||||
|
"class_type": "SaveImage",
|
||||||
|
"inputs": {
|
||||||
|
"filename_prefix": "imagen",
|
||||||
|
"images": ["8", 0]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"10": {
|
||||||
|
"class_type": "VAELoader",
|
||||||
|
"inputs": {
|
||||||
|
"vae_name": "${vae}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"11": {
|
||||||
|
"class_type": "DualCLIPLoader",
|
||||||
|
"inputs": {
|
||||||
|
"clip_name1": "${clip_t5}",
|
||||||
|
"clip_name2": "${clip_l}",
|
||||||
|
"type": "flux"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"12": {
|
||||||
|
"class_type": "UNETLoader",
|
||||||
|
"inputs": {
|
||||||
|
"unet_name": "${model}",
|
||||||
|
"weight_dtype": "${dtype}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"13": {
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"inputs": {
|
||||||
|
"text": "${negative}",
|
||||||
|
"clip": ["11", 0]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"27": {
|
||||||
|
"class_type": "EmptySD3LatentImage",
|
||||||
|
"inputs": {
|
||||||
|
"width": "${width}",
|
||||||
|
"height": "${height}",
|
||||||
|
"batch_size": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"30": {
|
||||||
|
"class_type": "ModelSamplingFlux",
|
||||||
|
"inputs": {
|
||||||
|
"model": ["12", 0],
|
||||||
|
"max_shift": 1.15,
|
||||||
|
"base_shift": 0.5,
|
||||||
|
"width": "${width}",
|
||||||
|
"height": "${height}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"31": {
|
||||||
|
"class_type": "KSampler",
|
||||||
|
"inputs": {
|
||||||
|
"model": ["30", 0],
|
||||||
|
"seed": "${seed}",
|
||||||
|
"steps": "${steps}",
|
||||||
|
"cfg": "${cfg}",
|
||||||
|
"sampler_name": "${sampler}",
|
||||||
|
"scheduler": "${scheduler}",
|
||||||
|
"denoise": 1.0,
|
||||||
|
"positive": ["6", 0],
|
||||||
|
"negative": ["13", 0],
|
||||||
|
"latent_image": ["27", 0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
79
internal/backend/workflows/flux2-klein.json
Normal file
79
internal/backend/workflows/flux2-klein.json
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
{
|
||||||
|
"6": {
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"inputs": {
|
||||||
|
"text": "${prompt}",
|
||||||
|
"clip": ["11", 0]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"8": {
|
||||||
|
"class_type": "VAEDecode",
|
||||||
|
"inputs": {
|
||||||
|
"samples": ["31", 0],
|
||||||
|
"vae": ["10", 0]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"9": {
|
||||||
|
"class_type": "SaveImage",
|
||||||
|
"inputs": {
|
||||||
|
"filename_prefix": "imagen",
|
||||||
|
"images": ["8", 0]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"10": {
|
||||||
|
"class_type": "VAELoader",
|
||||||
|
"inputs": {
|
||||||
|
"vae_name": "${vae}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"11": {
|
||||||
|
"class_type": "CLIPLoader",
|
||||||
|
"inputs": {
|
||||||
|
"clip_name": "${clip}",
|
||||||
|
"type": "flux2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"12": {
|
||||||
|
"class_type": "UNETLoader",
|
||||||
|
"inputs": {
|
||||||
|
"unet_name": "${model}",
|
||||||
|
"weight_dtype": "${dtype}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"14": {
|
||||||
|
"class_type": "FluxGuidance",
|
||||||
|
"inputs": {
|
||||||
|
"conditioning": ["6", 0],
|
||||||
|
"guidance": "${guidance}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"15": {
|
||||||
|
"class_type": "ConditioningZeroOut",
|
||||||
|
"inputs": {
|
||||||
|
"conditioning": ["6", 0]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"27": {
|
||||||
|
"class_type": "EmptyFlux2LatentImage",
|
||||||
|
"inputs": {
|
||||||
|
"width": "${width}",
|
||||||
|
"height": "${height}",
|
||||||
|
"batch_size": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"31": {
|
||||||
|
"class_type": "KSampler",
|
||||||
|
"inputs": {
|
||||||
|
"model": ["12", 0],
|
||||||
|
"seed": "${seed}",
|
||||||
|
"steps": "${steps}",
|
||||||
|
"cfg": "${cfg}",
|
||||||
|
"sampler_name": "${sampler}",
|
||||||
|
"scheduler": "${scheduler}",
|
||||||
|
"denoise": 1.0,
|
||||||
|
"positive": ["14", 0],
|
||||||
|
"negative": ["15", 0],
|
||||||
|
"latent_image": ["27", 0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
66
internal/backend/workflows/sd35-medium.json
Normal file
66
internal/backend/workflows/sd35-medium.json
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
{
|
||||||
|
"4": {
|
||||||
|
"class_type": "CheckpointLoaderSimple",
|
||||||
|
"inputs": {
|
||||||
|
"ckpt_name": "${model}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"6": {
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"inputs": {
|
||||||
|
"text": "${prompt}",
|
||||||
|
"clip": ["4", 1]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"7": {
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"inputs": {
|
||||||
|
"text": "${negative}",
|
||||||
|
"clip": ["4", 1]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"8": {
|
||||||
|
"class_type": "VAEDecode",
|
||||||
|
"inputs": {
|
||||||
|
"samples": ["31", 0],
|
||||||
|
"vae": ["4", 2]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"9": {
|
||||||
|
"class_type": "SaveImage",
|
||||||
|
"inputs": {
|
||||||
|
"filename_prefix": "imagen",
|
||||||
|
"images": ["8", 0]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"13": {
|
||||||
|
"class_type": "ModelSamplingSD3",
|
||||||
|
"inputs": {
|
||||||
|
"model": ["4", 0],
|
||||||
|
"shift": "${shift}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"27": {
|
||||||
|
"class_type": "EmptySD3LatentImage",
|
||||||
|
"inputs": {
|
||||||
|
"width": "${width}",
|
||||||
|
"height": "${height}",
|
||||||
|
"batch_size": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"31": {
|
||||||
|
"class_type": "KSampler",
|
||||||
|
"inputs": {
|
||||||
|
"model": ["13", 0],
|
||||||
|
"seed": "${seed}",
|
||||||
|
"steps": "${steps}",
|
||||||
|
"cfg": "${cfg}",
|
||||||
|
"sampler_name": "${sampler}",
|
||||||
|
"scheduler": "${scheduler}",
|
||||||
|
"denoise": 1.0,
|
||||||
|
"positive": ["6", 0],
|
||||||
|
"negative": ["7", 0],
|
||||||
|
"latent_image": ["27", 0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -144,15 +144,54 @@ output:
|
|||||||
cloud_sync: auto
|
cloud_sync: auto
|
||||||
|
|
||||||
backends:
|
backends:
|
||||||
|
# FLUX.1-schnell on the local ComfyUI server. The "workflow" key picks the
|
||||||
|
# bundled template under internal/backend/workflows/; omit it for back-compat
|
||||||
|
# (defaults to flux1-schnell). See docs/backends.md for the per-model setup.
|
||||||
flux-schnell-local:
|
flux-schnell-local:
|
||||||
type: comfyui
|
type: comfyui
|
||||||
base_url: http://mrock:8188
|
base_url: http://mrock:8188
|
||||||
|
workflow: flux1-schnell
|
||||||
# Filename of the unet checkpoint inside the ComfyUI server's
|
# Filename of the unet checkpoint inside the ComfyUI server's
|
||||||
# models/unet/ directory. See docs/setup-comfyui-mrock.md.
|
# models/unet/ directory.
|
||||||
model: flux1-schnell.safetensors
|
model: flux1-schnell.safetensors
|
||||||
|
vae: ae.safetensors
|
||||||
|
clip_l: clip_l.safetensors
|
||||||
|
clip_t5: t5xxl_fp8_e4m3fn.safetensors
|
||||||
|
dtype: fp8_e4m3fn
|
||||||
default_steps: 4
|
default_steps: 4
|
||||||
default_sampler: euler
|
default_sampler: euler
|
||||||
default_scheduler: simple
|
default_scheduler: simple
|
||||||
|
default_cfg: 1.0
|
||||||
|
|
||||||
|
# FLUX.2 [klein] 4B distilled — sub-second on RTX 4070 Ti SUPER.
|
||||||
|
# Weights: BFL non-commercial; flux-2-klein-base-4b-fp8 in models/unet/,
|
||||||
|
# qwen_3_4b in models/text_encoders/, flux2-vae in models/vae/.
|
||||||
|
flux2-klein-local:
|
||||||
|
type: comfyui
|
||||||
|
base_url: http://mrock:8188
|
||||||
|
workflow: flux2-klein
|
||||||
|
model: flux-2-klein-base-4b-fp8.safetensors
|
||||||
|
vae: flux2-vae.safetensors
|
||||||
|
clip: qwen_3_4b.safetensors
|
||||||
|
dtype: fp8_e4m3fn
|
||||||
|
default_steps: 4
|
||||||
|
default_sampler: euler
|
||||||
|
default_scheduler: simple
|
||||||
|
default_cfg: 1.0
|
||||||
|
guidance: 4.0
|
||||||
|
|
||||||
|
# SD3.5 medium — single-checkpoint variant that bundles the three text
|
||||||
|
# encoders inside the .safetensors. Drop into models/checkpoints/.
|
||||||
|
sd35-medium-local:
|
||||||
|
type: comfyui
|
||||||
|
base_url: http://mrock:8188
|
||||||
|
workflow: sd35-medium
|
||||||
|
model: sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors
|
||||||
|
default_steps: 28
|
||||||
|
default_sampler: dpmpp_2m
|
||||||
|
default_scheduler: sgm_uniform
|
||||||
|
default_cfg: 4.5
|
||||||
|
shift: 3.0
|
||||||
|
|
||||||
mock:
|
mock:
|
||||||
type: mock
|
type: mock
|
||||||
|
|||||||
Reference in New Issue
Block a user