mAi: #10 - multi-model backend expansion (workflow templates + compare harness)

Path 1 architecture: one comfyui adapter, workflows as data.

- workflow_template.go: embed.FS + token substitution with type-preserving
  whole-value placeholders. ${prompt} → string, ${seed} → int64,
  ${cfg} → float64 — no JSON round-tripping. Partial matches ignored.
- comfyui.go: refactored to load workflow from embedded FS or filesystem
  path. Back-compat preserved: workflow: defaults to flux1-schnell.
- workflows/{flux1-schnell,flux2-klein,sd35-medium}.json — bundled
  templates. flux1-schnell migrated from hardcoded with identical node IDs.
- compare.go: new `imagen compare` subcommand. Sequential N-backend run
  (one GPU on mRock — parallel would OOM), per-backend PNG, sidecar JSON
  with per-model metadata + errors, composite contact sheet via Go image
  package (no ImageMagick dep).
- Sample config gains flux2-klein-local + sd35-medium-local instances.
- docs/backends.md: architecture rationale + per-model HF download paths
  + how to add a new bundled workflow + compare-harness reference.

Live smoke verified: compare mock + flux-schnell-local at 768×768 →
both PNGs written, sidecar JSON has workflow="flux1-schnell" + full
metadata, contact sheet renders. Worker contract (Request → Generate)
unchanged, so flexsiebels /imagine UI API surface preserved.

Tests: 11 existing comfyui + 6 new workflow_template + 5 new compare
tests, all green.

Adding a new model is now yaml + JSON, never Go.
This commit is contained in:
mAi
2026-05-11 17:29:57 +02:00
parent 623dd290c5
commit 8435817ce1
15 changed files with 1638 additions and 122 deletions

View File

@@ -20,24 +20,29 @@ import (
const ComfyType = "comfyui"
// Comfy is the ComfyUI adapter. It speaks the public `/prompt` + `/history`
// + `/view` HTTP API and submits a fixed FLUX.1 schnell workflow built from
// the values in Request.
// + `/view` HTTP API and submits a workflow built by substituting Request
// values into a JSON template (bundled under internal/backend/workflows/ or
// loaded from a filesystem path).
//
// Concurrency: a single Comfy is safe to share across goroutines as long as
// the underlying http.Client is. Generate does not hold long-lived state.
type Comfy struct {
instance string
base string
model string
vae string
clipL string
clipT5 string
dtype string
base string
workflow string
// rawCfg keeps the original yaml block (minus framework keys) so we can
// expose every user-defined string/number as a workflow substitution
// without enumerating each per-model knob in Go. Empty values still get
// a substitution entry so a template can reference ${negative} when the
// request didn't pass one.
rawCfg map[string]any
defaultSteps int
defaultSampler string
defaultScheduler string
defaultCFG float64
httpClient *http.Client
pollInterval time.Duration
@@ -49,12 +54,20 @@ type Comfy struct {
}
// NewComfy is the registry constructor. cfg is the adapter's slice of
// imagen.yaml. Required keys: base_url, model. The rest have sensible FLUX
// schnell defaults.
// imagen.yaml.
//
// Required keys: base_url, model.
// Optional keys: workflow (defaults to "flux1-schnell" for back-compat with
// existing configs), default_steps, default_sampler, default_scheduler,
// default_cfg, plus any template-specific knobs (vae, clip, clip_l,
// clip_t5, dtype, shift, guidance, …) the chosen workflow references.
func NewComfy(name string, cfg map[string]any) (Backend, error) {
if name == "" {
return nil, fmt.Errorf("comfyui: empty instance name")
}
if cfg == nil {
cfg = map[string]any{}
}
base := strings.TrimRight(getString(cfg, "base_url", ""), "/")
if base == "" {
return nil, fmt.Errorf("comfyui[%s]: base_url is required", name)
@@ -67,23 +80,27 @@ func NewComfy(name string, cfg map[string]any) (Backend, error) {
return nil, fmt.Errorf("comfyui[%s]: model is required", name)
}
workflow := getString(cfg, "workflow", "flux1-schnell")
// Fail fast on a bad workflow ref so users see the error at startup,
// not on first /prompt submission.
if _, err := LoadWorkflowTemplate(workflow); err != nil {
return nil, fmt.Errorf("comfyui[%s]: %w", name, err)
}
c := &Comfy{
instance: name,
base: base,
model: model,
vae: getString(cfg, "vae", "ae.safetensors"),
clipL: getString(cfg, "clip_l", "clip_l.safetensors"),
clipT5: getString(cfg, "clip_t5", "t5xxl_fp8_e4m3fn.safetensors"),
dtype: getString(cfg, "weight_dtype", "fp8_e4m3fn"),
workflow: workflow,
rawCfg: cfg,
defaultSteps: getInt(cfg, "default_steps", 4),
defaultSampler: getString(cfg, "default_sampler", "euler"),
defaultScheduler: getString(cfg, "default_scheduler", "simple"),
defaultCFG: getFloat(cfg, "default_cfg", 1.0),
httpClient: &http.Client{Timeout: 60 * time.Second},
pollInterval: 250 * time.Millisecond,
pollTimeout: 120 * time.Second,
pollTimeout: 300 * time.Second,
randSeed: cryptoSeed,
clientIDFn: randClientID,
@@ -103,19 +120,26 @@ func (c *Comfy) Generate(ctx context.Context, req Request) (*Result, error) {
sampler := c.defaultSampler
scheduler := c.defaultScheduler
cfg := c.defaultCFG
if v, ok := req.BackendOpts["sampler"].(string); ok && v != "" {
sampler = v
}
if v, ok := req.BackendOpts["scheduler"].(string); ok && v != "" {
scheduler = v
}
if v, ok := req.BackendOpts["cfg"].(float64); ok && v > 0 {
cfg = v
}
seed := req.Seed
if seed == 0 {
seed = c.randSeed()
}
workflow := c.buildWorkflow(req.Prompt, req.NegativePrompt, width, height, seed, steps, sampler, scheduler)
workflow, err := c.buildWorkflow(req.Prompt, req.NegativePrompt, width, height, seed, steps, sampler, scheduler, cfg)
if err != nil {
return nil, fmt.Errorf("comfyui[%s]: build workflow: %w", c.instance, err)
}
clientID := c.clientIDFn()
start := time.Now()
@@ -133,14 +157,17 @@ func (c *Comfy) Generate(ctx context.Context, req Request) (*Result, error) {
}
latencyMs := time.Since(start).Milliseconds()
model := getString(c.rawCfg, "model", "")
meta := map[string]any{
"backend": c.instance,
"backend_type": ComfyType,
"model": c.model,
"workflow": c.workflow,
"model": model,
"seed": seed,
"steps": steps,
"sampler": sampler,
"scheduler": scheduler,
"cfg": cfg,
"width": width,
"height": height,
"latency_ms": latencyMs,
@@ -173,6 +200,7 @@ func (c *Comfy) submitPrompt(ctx context.Context, workflow map[string]any, clien
return "", fmt.Errorf("comfyui: marshal workflow: %w", err)
}
model := getString(c.rawCfg, "model", "")
var lastErr error
for attempt := range 2 {
if attempt > 0 {
@@ -196,7 +224,7 @@ func (c *Comfy) submitPrompt(ctx context.Context, workflow map[string]any, clien
_ = resp.Body.Close()
switch {
case resp.StatusCode >= 200 && resp.StatusCode < 300:
return parsePromptID(respBody, c.model)
return parsePromptID(respBody, model)
case resp.StatusCode >= 500:
lastErr = fmt.Errorf("comfyui /prompt %d: %s", resp.StatusCode, snip(respBody))
continue
@@ -333,98 +361,74 @@ func (c *Comfy) connError(err error) error {
// workflow-validation failures and put the diagnostics in node_errors; older
// builds use 200 + node_errors. This handles the 4xx flavour.
func (c *Comfy) classifyBadRequest(status int, body []byte) error {
if hint, ok := missingModelHint(body, c.model); ok {
return fmt.Errorf("comfyui /prompt %d: %s — see docs/setup-comfyui-mrock.md", status, hint)
model := getString(c.rawCfg, "model", "")
if hint, ok := missingModelHint(body, model); ok {
return fmt.Errorf("comfyui /prompt %d: %s — see docs/backends.md", status, hint)
}
return fmt.Errorf("comfyui /prompt %d: %s", status, snip(body))
}
// buildWorkflow assembles the canonical FLUX.1 schnell ComfyUI workflow,
// node-IDs matching the upstream "flux-schnell" template so anyone debugging
// in the ComfyUI UI sees a familiar shape.
func (c *Comfy) buildWorkflow(prompt, negative string, w, h int, seed int64, steps int, sampler, scheduler string) map[string]any {
return map[string]any{
"6": map[string]any{
"class_type": "CLIPTextEncode",
"inputs": map[string]any{
"text": prompt,
"clip": []any{"11", 0},
},
},
"8": map[string]any{
"class_type": "VAEDecode",
"inputs": map[string]any{
"samples": []any{"31", 0},
"vae": []any{"10", 0},
},
},
"9": map[string]any{
"class_type": "SaveImage",
"inputs": map[string]any{
"filename_prefix": "imagen",
"images": []any{"8", 0},
},
},
"10": map[string]any{
"class_type": "VAELoader",
"inputs": map[string]any{"vae_name": c.vae},
},
"11": map[string]any{
"class_type": "DualCLIPLoader",
"inputs": map[string]any{
"clip_name1": c.clipT5,
"clip_name2": c.clipL,
"type": "flux",
},
},
"12": map[string]any{
"class_type": "UNETLoader",
"inputs": map[string]any{
"unet_name": c.model,
"weight_dtype": c.dtype,
},
},
"13": map[string]any{
"class_type": "CLIPTextEncode",
"inputs": map[string]any{
"text": negative,
"clip": []any{"11", 0},
},
},
"27": map[string]any{
"class_type": "EmptySD3LatentImage",
"inputs": map[string]any{
"width": w,
"height": h,
"batch_size": 1,
},
},
"30": map[string]any{
"class_type": "ModelSamplingFlux",
"inputs": map[string]any{
"model": []any{"12", 0},
"max_shift": 1.15,
"base_shift": 0.5,
"width": w,
"height": h,
},
},
"31": map[string]any{
"class_type": "KSampler",
"inputs": map[string]any{
"model": []any{"30", 0},
"seed": seed,
"steps": steps,
"cfg": 1.0,
"sampler_name": sampler,
"scheduler": scheduler,
"denoise": 1.0,
"positive": []any{"6", 0},
"negative": []any{"13", 0},
"latent_image": []any{"27", 0},
},
},
// buildWorkflow loads the configured workflow template and substitutes the
// per-call placeholders (prompt, seed, sampler, …) plus any string/number
// fields the user defined in the yaml block. The set of placeholder keys
// that aren't in `subs` produces an error from SubstituteWorkflow.
func (c *Comfy) buildWorkflow(prompt, negative string, w, h int, seed int64, steps int, sampler, scheduler string, cfg float64) (map[string]any, error) {
wf, err := LoadWorkflowTemplate(c.workflow)
if err != nil {
return nil, err
}
subs := map[string]any{
"prompt": prompt,
"negative": negative,
"width": w,
"height": h,
"seed": seed,
"steps": steps,
"sampler": sampler,
"scheduler": scheduler,
"cfg": cfg,
}
// Surface every scalar field from the yaml block so per-template knobs
// (vae, clip, clip_l, clip_t5, dtype, shift, guidance, …) work without
// adapter-code changes. Framework keys are excluded.
for k, v := range c.rawCfg {
switch k {
case "type", "base_url", "workflow",
"default_steps", "default_sampler", "default_scheduler", "default_cfg":
continue
}
if _, alreadySet := subs[k]; alreadySet {
// A per-call var (e.g. ${prompt}) beats anything yaml put under
// the same key — yaml can't shadow request-derived values.
continue
}
switch v := v.(type) {
case string, int, int64, float64, bool:
subs[k] = v
}
}
// Provide sensible defaults for common optional knobs so a workflow that
// references one of these doesn't fail substitution when the user
// didn't override it in yaml. Extra keys are ignored if the workflow
// doesn't reference them, so it's safe to always set the lot.
defaults := map[string]any{
"vae": "ae.safetensors",
"clip_l": "clip_l.safetensors",
"clip_t5": "t5xxl_fp8_e4m3fn.safetensors",
"clip": "qwen_3_4b.safetensors",
"dtype": "fp8_e4m3fn",
"guidance": 4.0,
"shift": 3.0,
}
for k, v := range defaults {
if _, ok := subs[k]; !ok {
subs[k] = v
}
}
if _, err := SubstituteWorkflow(wf, subs); err != nil {
return nil, err
}
return wf, nil
}
// parsePromptID handles the 2xx /prompt response. ComfyUI sometimes 200s a
@@ -432,8 +436,8 @@ func (c *Comfy) buildWorkflow(prompt, negative string, w, h int, seed int64, ste
// turns that into the same user-facing error as a 4xx with the same body.
func parsePromptID(body []byte, model string) (string, error) {
var resp struct {
PromptID string `json:"prompt_id"`
NodeErrors map[string]any `json:"node_errors"`
PromptID string `json:"prompt_id"`
NodeErrors map[string]any `json:"node_errors"`
Error json.RawMessage `json:"error"`
}
if err := json.Unmarshal(body, &resp); err != nil {
@@ -441,7 +445,7 @@ func parsePromptID(body []byte, model string) (string, error) {
}
if len(resp.NodeErrors) > 0 || len(resp.Error) > 0 {
if hint, ok := missingModelHint(body, model); ok {
return "", fmt.Errorf("comfyui /prompt: %s — see docs/setup-comfyui-mrock.md", hint)
return "", fmt.Errorf("comfyui /prompt: %s — see docs/backends.md", hint)
}
return "", fmt.Errorf("comfyui /prompt rejected workflow: %s", snip(body))
}
@@ -489,15 +493,21 @@ func parseHistory(body []byte, promptID string) (string, bool, error) {
}
// missingModelHint returns a user-actionable message when the response body
// indicates the configured unet model isn't loaded on the server. ComfyUI
// uses both the human-readable "Value not in list" message and the enum
// "value_not_in_list" type — match either.
// indicates the configured unet/checkpoint model isn't loaded on the server.
// ComfyUI uses both the human-readable "Value not in list" message and the
// enum "value_not_in_list" type — match either.
func missingModelHint(body []byte, model string) (string, bool) {
s := string(body)
hasMarker := strings.Contains(s, "Value not in list") || strings.Contains(s, "value_not_in_list")
if hasMarker && strings.Contains(s, "unet_name") {
if !hasMarker {
return "", false
}
if strings.Contains(s, "unet_name") {
return fmt.Sprintf("model %q not present in the ComfyUI server's models/unet/", model), true
}
if strings.Contains(s, "ckpt_name") {
return fmt.Sprintf("checkpoint %q not present in the ComfyUI server's models/checkpoints/", model), true
}
return "", false
}
@@ -536,6 +546,22 @@ func getInt(m map[string]any, k string, def int) int {
return def
}
func getFloat(m map[string]any, k string, def float64) float64 {
if v, ok := m[k]; ok {
switch n := v.(type) {
case float64:
return n
case float32:
return float64(n)
case int:
return float64(n)
case int64:
return float64(n)
}
}
return def
}
func orDefaultInt(v, def int) int {
if v == 0 {
return def