From 237270b2048e0f9acbc368638dd5f25980c81736 Mon Sep 17 00:00:00 2001 From: mAi Date: Fri, 8 May 2026 14:37:05 +0200 Subject: [PATCH] mAi: #211 - bootstrap ImaGen framework skeleton MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First step of the model-agnostic image-generation framework. Lands the plumbing other components (skill, ComfyUI/Replicate adapters, agents) will plug into: - internal/backend: Backend interface (Request/Result), thread-safe Registry with init-time Register, plus a Mock reference adapter that emits a deterministic gradient PNG for smoke tests. - internal/config: YAML loader for ~/.config/imagen.yaml. Framework owns default_backend + output settings + a per-backend block; each adapter owns the schema below its own block via BackendSpec.Raw. - internal/output: filename templating ({date}/{time}/{slug}/{seed}/ {backend}/{ext}), JSON metadata sidecar, --output override path. - internal/prompt: embedded styles.yaml, style-preset suffix application. - internal/server: 501 stub — HTTP surface lands in a follow-up issue. - cmd/imagen: generate / backends / config (init|validate|path) / serve / version subcommands. Stdlib-only flag parsing with a small helper to honour positional prompt args ahead of flags (matches the issue spec). - Tests for output (slug, naming template, sidecar), backend (mock PNG validity + determinism, registry build + duplicate panic), config (round-trip + validation), prompt (style apply + unknown-style error). - CLAUDE.md, README.md, docs/architecture.md, docs/usage.md, Makefile. Acceptance criteria from #211: 1. go build ./... — clean 2. imagen backends — lists registered backends, exits 0 3. imagen generate "test prompt" --backend mock --output /tmp/x.png — writes a 1024x1024 PNG plus an x.png.json sidecar 4. imagen config init | imagen config validate — round-trips cleanly 5. CLAUDE.md "Adding a new adapter" — six-step recipe --- .gitignore | 9 ++ CLAUDE.md | 113 +++++++++++++++++ Makefile | 23 ++++ README.md | 49 +++++++- cmd/imagen/backends.go | 51 ++++++++ cmd/imagen/config.go | 57 +++++++++ cmd/imagen/generate.go | 210 +++++++++++++++++++++++++++++++ cmd/imagen/main.go | 77 ++++++++++++ cmd/imagen/serve.go | 21 ++++ docs/architecture.md | 110 ++++++++++++++++ docs/usage.md | 73 +++++++++++ go.mod | 5 + go.sum | 4 + internal/backend/backend.go | 37 ++++++ internal/backend/backend_test.go | 93 ++++++++++++++ internal/backend/mock.go | 116 +++++++++++++++++ internal/backend/registry.go | 75 +++++++++++ internal/config/config.go | 143 +++++++++++++++++++++ internal/config/config_test.go | 70 +++++++++++ internal/output/output.go | 184 +++++++++++++++++++++++++++ internal/output/output_test.go | 127 +++++++++++++++++++ internal/prompt/prompt.go | 63 ++++++++++ internal/prompt/prompt_test.go | 50 ++++++++ internal/prompt/styles.yaml | 6 + internal/server/server.go | 31 +++++ 25 files changed, 1796 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 Makefile create mode 100644 cmd/imagen/backends.go create mode 100644 cmd/imagen/config.go create mode 100644 cmd/imagen/generate.go create mode 100644 cmd/imagen/main.go create mode 100644 cmd/imagen/serve.go create mode 100644 docs/architecture.md create mode 100644 docs/usage.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/backend/backend.go create mode 100644 internal/backend/backend_test.go create mode 100644 internal/backend/mock.go create mode 100644 internal/backend/registry.go create mode 100644 internal/config/config.go create mode 100644 internal/config/config_test.go create mode 100644 internal/output/output.go create mode 100644 internal/output/output_test.go create mode 100644 internal/prompt/prompt.go create mode 100644 internal/prompt/prompt_test.go create mode 100644 internal/prompt/styles.yaml create mode 100644 internal/server/server.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eac5259 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +/bin/ +/dist/ +*.test +*.out +.DS_Store +.env +.env.local +/imagen +/coverage.txt diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..5e40263 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,113 @@ +# ImaGen — Project Instructions + +ImaGen is a model-agnostic image-generation framework. It has a single +opinionated CLI (`imagen`) that dispatches to whichever backend the user +configured — local FLUX on mRock via ComfyUI today, Replicate or DALL-E +tomorrow, something else next year. The framework owns plumbing (config, +output, naming, sidecars, prompt enrichment); each adapter owns the schema +and lifecycle of its own block in `~/.config/imagen.yaml`. + +## Architecture + +``` +cmd/imagen/ CLI shell — generate, backends, config, serve +internal/backend/ Backend interface + Registry + Mock reference impl +internal/prompt/ Style preset registry (embedded styles.yaml) +internal/output/ Filename templating, image writer, JSON sidecar +internal/config/ YAML loader, validation, sample generator +internal/server/ HTTP stub (not implemented yet — follow-up issue) +docs/ architecture.md, usage.md +``` + +Data flow for `imagen generate`: + +1. Parse flags, load config (`internal/config`). +2. Resolve the requested **instance name** to a config block, then the block's + `type` to a registered constructor in `backend.Default`. +3. Apply style preset (`internal/prompt`) to the prompt. +4. Call `backend.Generate(ctx, Request)`. The adapter returns a `*Result` + with an image stream + metadata. +5. Stream to disk via `internal/output`. If `write_metadata_json` is on, a + sidecar `.json` is written next to it. + +## Backend contract + +```go +type Backend interface { + Name() string + Generate(ctx context.Context, req Request) (*Result, error) +} +``` + +`Request` carries the cross-backend fields (prompt, negative, size, steps, +seed, style preset, free-form `BackendOpts`). `Result` returns the image +bytes via an `io.ReadCloser`, the MIME type, and a metadata map (model name, +seed actually used, latency, cost-estimate, …). + +## Adding a new adapter + +1. Create `internal/backend/.go` (e.g. `comfyui.go`). Define a struct + that holds whatever the adapter needs (HTTP client, model id, token). +2. Add a constructor `func New(name string, cfg map[string]any) (Backend, error)`. + Read fields from `cfg` — that map is the adapter's own block from + `imagen.yaml` minus the `type:` key. Resolve secrets from env vars + (`api_token_env`, `api_key_env`) — never accept tokens inline. +3. Implement `Name()` (return the user-facing instance name) and + `Generate(ctx, Request)`. +4. In `init()` call `Register("", New)`. +5. Anonymous-import the package from `cmd/imagen/main.go` if it lives in a + separate package, so the `init()` runs. +6. Add a smoke test under `internal/backend/_test.go`. Network tests + should be guarded by `testing.Short()` or an env var. + +## Config + +`~/.config/imagen.yaml` (override with `--config`). Top-level keys: + +- `default_backend` — instance name used when `--backend` is omitted. +- `output.directory` / `output.naming` / `output.write_metadata_json`. +- `backends:` — map of instance-name → `{type, …adapter-specific…}`. + +The framework parses `type` and stuffs the rest into `BackendSpec.Raw`. The +adapter is free to define any schema it likes inside its block. + +## Credentials + +Never hardcode. Always reference env-var names from the config: + +```yaml +flux-dev-replicate: + type: replicate + api_token_env: REPLICATE_API_TOKEN +``` + +The adapter then `os.Getenv("REPLICATE_API_TOKEN")` at construction and fails +fast if unset. Tokens never go through `imagen.yaml` in plaintext. + +## How the `/imagine` skill calls into imagen + +The skill (issue #4) wraps `imagen generate` and post-processes the path it +prints on stdout. Slash-command surface area: + +``` +/imagine "a cat in a fishbowl" --style blog-header --size 1024x1024 +``` + +The skill resolves to `imagen generate "" --backend …` and +returns the image path so otto can attach it to a chat reply. + +## References + +- mAi project conventions: `~/.m/docs/msystem.md` +- Backend follow-ups: ImaGen issues #2 (ComfyUI on mRock), #3 (Replicate), #4 (skill) +- mRock GPU: NVIDIA RTX 4070 Ti SUPER, 16 GB VRAM, runs Ollama + F5-TTS + +## House rules + +- No technical debt. No TODOs in landed code. If something can't be done now, + open an issue. +- All user-facing strings: ASCII or proper Unicode (Umlaute), never `ae/oe/ue`. +- Tests live next to the package they cover (`*_test.go`). No `tests/` dir. +- `go build ./...` and `go test ./...` must be clean before any commit. +- Run `task build` (or `make build`) for the full build; both call into + `go build -o bin/imagen ./cmd/imagen`. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..066180b --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +.PHONY: build install test lint clean smoke + +BIN := bin/imagen +PKG := mgit.msbls.de/m/ImaGen/cmd/imagen + +build: + go build -o $(BIN) ./cmd/imagen + +install: + go install ./cmd/imagen + +test: + go test ./... + +lint: + go vet ./... + +smoke: build + ./$(BIN) generate "smoke test" --backend mock --output /tmp/imagen-smoke.png --no-sidecar + @file /tmp/imagen-smoke.png + +clean: + rm -rf bin/ diff --git a/README.md b/README.md index e609690..55ce597 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,50 @@ # ImaGen -Model-agnostic image-generation framework: pluggable backends (local FLUX on mRock, Replicate, DALL-E, …) behind a single CLI/API/skill. \ No newline at end of file +Model-agnostic image-generation framework: pluggable backends (local FLUX on +mRock, Replicate, DALL-E, …) behind a single CLI / skill / API. + +``` +imagen generate "a cat in a fishbowl" --backend flux-schnell-local --size 1024x1024 +``` + +See [`CLAUDE.md`](./CLAUDE.md) for the design — backend contract, registry, +config layout, how to add a new adapter. + +## Install + +```sh +go install mgit.msbls.de/m/ImaGen/cmd/imagen@latest +``` + +Or from a checkout: + +```sh +make build # writes ./bin/imagen +make install # installs into $GOBIN (defaults to ~/go/bin) +``` + +## First run + +```sh +mkdir -p ~/.config +imagen config init > ~/.config/imagen.yaml +imagen config validate +imagen backends +imagen generate "test prompt" --backend mock --output /tmp/x.png +``` + +The mock backend ships in this repo and produces a deterministic gradient +PNG — useful for smoke-testing the pipeline without reaching any model. + +## Status + +| Component | Status | +| ----------------------- | ------------- | +| Backend interface | done (#1) | +| Mock backend | done (#1) | +| ComfyUI / FLUX on mRock | open (#2) | +| Replicate adapter | open (#3) | +| `/imagine` skill | open (#4) | +| HTTP server | stubbed (#1) | + +Issues live at . diff --git a/cmd/imagen/backends.go b/cmd/imagen/backends.go new file mode 100644 index 0000000..c8d01ad --- /dev/null +++ b/cmd/imagen/backends.go @@ -0,0 +1,51 @@ +package main + +import ( + "flag" + "fmt" + "os" + "text/tabwriter" + + "mgit.msbls.de/m/ImaGen/internal/backend" + "mgit.msbls.de/m/ImaGen/internal/config" +) + +func runBackends(args []string) error { + fs := flag.NewFlagSet("backends", flag.ContinueOnError) + var configPath string + fs.StringVar(&configPath, "config", "", "config file path (default: ~/.config/imagen.yaml)") + if err := fs.Parse(args); err != nil { + return err + } + + cfg, cfgErr := config.Load(configPath) + if cfgErr != nil && !os.IsNotExist(cfgErr) { + return cfgErr + } + + tw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + fmt.Fprintln(tw, "INSTANCE\tTYPE\tSTATUS") + if cfg != nil { + for name, spec := range cfg.Backends { + status := "registered" + if !backend.Default.Has(spec.Type) { + status = fmt.Sprintf("type %q not compiled in", spec.Type) + } + marker := "" + if name == cfg.DefaultBackend { + marker = " (default)" + } + fmt.Fprintf(tw, "%s%s\t%s\t%s\n", name, marker, spec.Type, status) + } + } + if cfg == nil { + for _, t := range backend.Default.Types() { + fmt.Fprintf(tw, "%s\t%s\t%s\n", t, t, "no config — type registered, no instance defined") + } + } + if err := tw.Flush(); err != nil { + return err + } + fmt.Fprintln(os.Stderr, "registered types:", backend.Default.Types()) + return nil +} diff --git a/cmd/imagen/config.go b/cmd/imagen/config.go new file mode 100644 index 0000000..617ab1a --- /dev/null +++ b/cmd/imagen/config.go @@ -0,0 +1,57 @@ +package main + +import ( + "flag" + "fmt" + "os" + + "mgit.msbls.de/m/ImaGen/internal/config" +) + +func runConfig(args []string) error { + if len(args) < 1 { + return userErr("usage: imagen config ") + } + switch args[0] { + case "init": + fmt.Print(config.Sample) + return nil + case "path": + p, err := config.DefaultPath() + if err != nil { + return err + } + fmt.Println(p) + return nil + case "validate": + fs := flag.NewFlagSet("config validate", flag.ContinueOnError) + var path string + fs.StringVar(&path, "config", "", "config file path (default: ~/.config/imagen.yaml)") + if err := fs.Parse(args[1:]); err != nil { + return err + } + cfg, err := config.Load(path) + if err != nil { + if os.IsNotExist(err) { + return userErr("no config file at %s — run `imagen config init > ` first", configPathOrDefault(path)) + } + return err + } + fmt.Fprintf(os.Stdout, "OK — %d backend(s) defined, default=%q\n", + len(cfg.Backends), cfg.DefaultBackend) + return nil + default: + return userErr("unknown config subcommand %q (init|validate|path)", args[0]) + } +} + +func configPathOrDefault(p string) string { + if p != "" { + return p + } + d, err := config.DefaultPath() + if err != nil { + return "~/.config/imagen.yaml" + } + return d +} diff --git a/cmd/imagen/generate.go b/cmd/imagen/generate.go new file mode 100644 index 0000000..fe6a3d8 --- /dev/null +++ b/cmd/imagen/generate.go @@ -0,0 +1,210 @@ +package main + +import ( + "context" + "flag" + "fmt" + "os" + "strconv" + "strings" + + "mgit.msbls.de/m/ImaGen/internal/backend" + "mgit.msbls.de/m/ImaGen/internal/config" + "mgit.msbls.de/m/ImaGen/internal/output" + "mgit.msbls.de/m/ImaGen/internal/prompt" +) + +func runGenerate(ctx context.Context, args []string) error { + fs := flag.NewFlagSet("generate", flag.ContinueOnError) + var ( + backendName string + size string + outPath string + seed int64 + steps int + style string + negative string + configPath string + noSidecar bool + ) + fs.StringVar(&backendName, "backend", "", "backend instance name (default: config.default_backend)") + fs.StringVar(&size, "size", "1024x1024", "WxH, e.g. 1024x1024") + fs.StringVar(&outPath, "output", "", "explicit output path (overrides config naming template)") + fs.Int64Var(&seed, "seed", 0, "deterministic seed (0 = backend default)") + fs.IntVar(&steps, "steps", 0, "diffusion steps (0 = backend default)") + fs.StringVar(&style, "style", "", "style preset name (see imagen config init for the list)") + fs.StringVar(&negative, "negative", "", "negative prompt (ignored by backends that don't support it)") + fs.StringVar(&configPath, "config", "", "config file path (default: ~/.config/imagen.yaml)") + fs.BoolVar(&noSidecar, "no-sidecar", false, "skip the JSON sidecar even if config enables it") + fs.Usage = func() { + fmt.Fprintln(fs.Output(), `Usage: imagen generate "" [flags]`) + fs.PrintDefaults() + } + // stdlib flag stops parsing at the first non-flag arg, so split the + // prompt (leading positional args) from the flags ourselves before parsing. + leadingPositional, flagArgs := splitLeadingPositional(args) + if err := fs.Parse(flagArgs); err != nil { + return err + } + positional := append(leadingPositional, fs.Args()...) + if len(positional) == 0 { + fs.Usage() + return userErr("missing prompt") + } + rawPrompt := strings.Join(positional, " ") + + w, h, err := parseSize(size) + if err != nil { + return userErr("bad --size: %v", err) + } + + cfg, cfgErr := config.Load(configPath) + if cfgErr != nil && !os.IsNotExist(cfgErr) { + return cfgErr + } + + if backendName == "" { + if cfg != nil { + backendName = cfg.DefaultBackend + } + } + if backendName == "" { + return userErr("no --backend given and no default_backend in config") + } + + be, err := buildBackend(cfg, backendName) + if err != nil { + return err + } + + finalPrompt, err := prompt.Apply(rawPrompt, style) + if err != nil { + return userErr("%v", err) + } + + req := backend.Request{ + Prompt: finalPrompt, + NegativePrompt: negative, + Width: w, + Height: h, + Steps: steps, + Seed: seed, + Style: style, + } + res, err := be.Generate(ctx, req) + if err != nil { + return fmt.Errorf("backend %q: %w", backendName, err) + } + defer res.ImageReader.Close() + + writer := buildWriter(cfg, noSidecar) + in := output.Inputs{ + Prompt: rawPrompt, + Backend: be.Name(), + Seed: seedFromMetadata(res.Metadata, seed), + Ext: extFromMime(res.MimeType), + Metadata: res.Metadata, + } + var paths *output.Outputs + if outPath != "" { + paths, err = writer.WriteToPath(res.ImageReader, outPath, in) + } else { + paths, err = writer.Write(res.ImageReader, in) + } + if err != nil { + return err + } + fmt.Println(paths.ImagePath) + if paths.SidecarPath != "" { + fmt.Fprintln(os.Stderr, "sidecar:", paths.SidecarPath) + } + return nil +} + +// splitLeadingPositional separates the positional args at the start of args +// from the rest (which begins with the first flag). A literal "--" terminator +// pushes everything after it into the positional list and out of flag parsing. +func splitLeadingPositional(args []string) (positional, flags []string) { + for i, a := range args { + if a == "--" { + return append(positional, args[i+1:]...), flags + } + if strings.HasPrefix(a, "-") { + return positional, args[i:] + } + positional = append(positional, a) + } + return positional, flags +} + +func parseSize(s string) (int, int, error) { + parts := strings.SplitN(s, "x", 2) + if len(parts) != 2 { + return 0, 0, fmt.Errorf("expected WxH, got %q", s) + } + w, err := strconv.Atoi(parts[0]) + if err != nil { + return 0, 0, err + } + h, err := strconv.Atoi(parts[1]) + if err != nil { + return 0, 0, err + } + return w, h, nil +} + +func buildBackend(cfg *config.Config, name string) (backend.Backend, error) { + if cfg != nil { + spec, ok := cfg.Backends[name] + if ok { + return backend.Default.Build(spec.Type, name, spec.Raw) + } + } + if backend.Default.Has(name) { + return backend.Default.Build(name, name, nil) + } + return nil, userErr("backend %q not found in config and not a registered type (registered types: %v)", + name, backend.Default.Types()) +} + +func buildWriter(cfg *config.Config, noSidecar bool) *output.Writer { + w := &output.Writer{} + if cfg != nil { + w.Directory = config.ExpandPath(cfg.Output.Directory) + w.NameTemplate = cfg.Output.Naming + w.WriteSidecar = cfg.Output.WriteMetadataJSON + } + if w.Directory == "" { + w.Directory = "." + } + if noSidecar { + w.WriteSidecar = false + } + return w +} + +func seedFromMetadata(meta map[string]any, fallback int64) int64 { + if v, ok := meta["seed"]; ok { + switch n := v.(type) { + case int64: + return n + case int: + return int64(n) + case float64: + return int64(n) + } + } + return fallback +} + +func extFromMime(mime string) string { + switch mime { + case "image/png", "": + return "png" + case "image/jpeg": + return "jpg" + case "image/webp": + return "webp" + } + return "bin" +} diff --git a/cmd/imagen/main.go b/cmd/imagen/main.go new file mode 100644 index 0000000..d62c794 --- /dev/null +++ b/cmd/imagen/main.go @@ -0,0 +1,77 @@ +// Command imagen is the model-agnostic image-generation CLI. It dispatches +// `generate`, `backends`, and `config` subcommands against backends that +// register themselves at package init time. +package main + +import ( + "context" + "errors" + "fmt" + "os" + "os/signal" + "syscall" + + _ "mgit.msbls.de/m/ImaGen/internal/backend" +) + +const usage = `imagen — model-agnostic image generation + +Usage: + imagen generate [flags] generate one image + imagen backends list registered backend types + imagen config init print a sample imagen.yaml on stdout + imagen config validate validate the active config + imagen serve [--addr :8080] (stub) start the HTTP server + imagen version print version + imagen help show this help + +Run "imagen --help" for subcommand-specific flags. +` + +// Version is overridable at link time via -ldflags '-X main.Version=...'. +var Version = "dev" + +func main() { + if len(os.Args) < 2 { + fmt.Fprint(os.Stderr, usage) + os.Exit(2) + } + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer cancel() + + args := os.Args[2:] + var err error + switch os.Args[1] { + case "generate": + err = runGenerate(ctx, args) + case "backends": + err = runBackends(args) + case "config": + err = runConfig(args) + case "serve": + err = runServe(args) + case "version", "-v", "--version": + fmt.Println(Version) + case "help", "-h", "--help": + fmt.Print(usage) + default: + fmt.Fprintf(os.Stderr, "imagen: unknown subcommand %q\n\n%s", os.Args[1], usage) + os.Exit(2) + } + if err != nil { + fmt.Fprintln(os.Stderr, "imagen:", err) + var u *userError + if errors.As(err, &u) { + os.Exit(2) + } + os.Exit(1) + } +} + +// userError signals "user did the wrong thing" so we exit 2 rather than 1. +type userError struct{ msg string } + +func (u *userError) Error() string { return u.msg } +func userErr(format string, a ...any) error { + return &userError{msg: fmt.Sprintf(format, a...)} +} diff --git a/cmd/imagen/serve.go b/cmd/imagen/serve.go new file mode 100644 index 0000000..27ed789 --- /dev/null +++ b/cmd/imagen/serve.go @@ -0,0 +1,21 @@ +package main + +import ( + "flag" + "fmt" + "net/http" + + "mgit.msbls.de/m/ImaGen/internal/server" +) + +func runServe(args []string) error { + fs := flag.NewFlagSet("serve", flag.ContinueOnError) + var addr string + fs.StringVar(&addr, "addr", ":8080", "listen address") + if err := fs.Parse(args); err != nil { + return err + } + srv := server.NotImplemented{}.Handler() + fmt.Fprintf(fs.Output(), "imagen serve: stub responding 501 on %s — see internal/server for status\n", addr) + return http.ListenAndServe(addr, srv) +} diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..4b4960f --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,110 @@ +# ImaGen architecture + +ImaGen is intentionally small. The framework owns plumbing; adapters own the +upstream API. Each adapter only ever sees its own slice of `imagen.yaml`. + +## Layers + +``` + ┌───────────────────────┐ + │ cmd/imagen │ CLI dispatch + │ (or HTTP server) │ + └──────────┬────────────┘ + │ + ┌──────────▼────────────┐ + │ internal/prompt │ style preset → prompt suffix + │ internal/output │ filename templating, sidecar + │ internal/config │ YAML loader, validation + └──────────┬────────────┘ + │ + ┌──────────▼────────────┐ + │ internal/backend │ Backend interface + Registry + └──────────┬────────────┘ + │ + ┌──────────▼────────────┐ + │ adapters │ ComfyUI · Replicate · OpenAI · … + │ (each one register- │ each registers a `type` name on + │ s in init()) │ `backend.Default` at init time. + └───────────────────────┘ +``` + +## The Backend contract + +```go +type Request struct { + Prompt string + NegativePrompt string + Width, Height int + Steps int + Seed int64 + Style string + BackendOpts map[string]any +} + +type Result struct { + ImageReader io.ReadCloser + MimeType string + Metadata map[string]any +} + +type Backend interface { + Name() string + Generate(ctx context.Context, req Request) (*Result, error) +} +``` + +Adapters translate `Request` into whatever the upstream expects. Fields they +can't honour (e.g. `NegativePrompt` on DALL-E) are silently ignored. + +## Registry + +`backend.Default` holds the process-wide name → constructor map. Each adapter +calls `backend.Register("", NewX)` from its `init()`. The CLI imports +`internal/backend` (which transitively triggers the mock's init) and any +extra adapter packages. + +## Config flow + +``` +imagen.yaml + backends: + flux-schnell-local: + type: comfyui ──┐ + base_url: http://mrock:8188 │ framework keeps `type`, + model: flux1-schnell.safetensors │ hands the rest to the + default_steps: 4 │ comfyui adapter as cfg map[string]any + ──┘ +``` + +The framework never inspects fields below `type`. That's the adapter's +contract with itself, expressed however the adapter wants (typed struct, +map lookups, JSON tags — its call). + +## Output + +``` +output: + directory: ~/Pictures/imagen + naming: "{date}-{slug}-{seed}.png" + write_metadata_json: true +``` + +Placeholders: `{date}`, `{time}`, `{slug}` (lowercased prompt, alnum-only, +truncated to 40 chars), `{seed}`, `{backend}`, `{ext}`. The sidecar JSON +contains the prompt, backend instance name, seed, ISO timestamp, and the +`Result.Metadata` map verbatim. + +## Where adapters fail fast + +- Missing required field in their config block — return an error from the + constructor; the CLI surfaces it as `imagen: backend "X": `. +- Unset env-var for credentials — same. +- Network errors during `Generate` — wrap and return; no retry policy yet + (decide per-adapter, or move to a shared retry helper if a pattern emerges). + +## Out of scope (today) + +- Image post-processing (cropping, watermarking). +- Cost-tracking (lands with the Replicate adapter, since only API backends bill). +- Multi-image `n>1` per request — backends that support it can expose it via + `BackendOpts`; the framework doesn't have a first-class field yet. diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..21df79e --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,73 @@ +# Using imagen + +## Subcommands + +``` +imagen generate [flags] generate one image +imagen backends list configured + registered backends +imagen config init print a sample imagen.yaml on stdout +imagen config validate parse + validate the active config +imagen config path print the resolved config path +imagen serve [--addr :8080] (stub) start the HTTP server +imagen version print version +``` + +## `generate` flags + +| Flag | Default | Notes | +| -------------- | ---------------------------- | ----------------------------------------------------------- | +| `--backend` | `default_backend` from config | Instance name from `imagen.yaml` | +| `--size` | `1024x1024` | `WxH` | +| `--seed` | `0` (= backend default) | | +| `--steps` | `0` (= backend default) | | +| `--style` | empty | One of `imagen config init`'s style names | +| `--negative` | empty | Negative prompt (ignored by some adapters) | +| `--output` | empty (= use naming template) | Explicit path | +| `--no-sidecar` | `false` | Skip the JSON sidecar even if config enables it | +| `--config` | `~/.config/imagen.yaml` | Override config path | + +## Examples + +```sh +# Quick smoke test — mock backend ships in-tree +imagen generate "test" --backend mock --output /tmp/x.png + +# Real generation, FLUX-schnell on mRock via ComfyUI +imagen generate "a wide editorial blog header about RAG systems" \ + --backend flux-schnell-local \ + --style blog-header \ + --size 1536x768 + +# Explicit seed for reproducibility +imagen generate "a cat in a fishbowl" --backend mock --seed 42 --output /tmp/cat.png +``` + +## Config + +A complete sample is in `imagen config init`. Adapters get only their own +sub-block — see [`../CLAUDE.md`](../CLAUDE.md) for the contract. + +## Naming template + +`output.naming` placeholders: + +| Placeholder | Replaced with | +| ----------- | ---------------------------------------- | +| `{date}` | `2026-05-08` | +| `{time}` | `143015` (no separators) | +| `{slug}` | lowercased ASCII prompt, ≤ 40 chars | +| `{seed}` | seed actually used | +| `{backend}` | backend instance name | +| `{ext}` | file extension matching `Result.MimeType` | + +Unknown placeholders are left literal. + +## Credentials + +API-backed adapters read tokens from env vars referenced by the config +(`api_token_env`, `api_key_env`). Never put a token in `imagen.yaml`. + +```sh +export REPLICATE_API_TOKEN=... +imagen generate "a cat" --backend flux-dev-replicate +``` diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..7c84c19 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module mgit.msbls.de/m/ImaGen + +go 1.24 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..a62c313 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/backend/backend.go b/internal/backend/backend.go new file mode 100644 index 0000000..6bbdefd --- /dev/null +++ b/internal/backend/backend.go @@ -0,0 +1,37 @@ +// Package backend defines the model-agnostic contract every image-generation +// adapter must satisfy. The framework speaks only through Backend; concrete +// adapters (ComfyUI, Replicate, OpenAI, …) translate Request into whatever +// the upstream API expects and return a Result. +package backend + +import ( + "context" + "io" +) + +// Request is the cross-backend request shape. Adapters translate it +// to whatever their target API expects. Zero values mean "use backend default" +// unless documented otherwise. +type Request struct { + Prompt string + NegativePrompt string + Width, Height int + Steps int + Seed int64 + Style string + BackendOpts map[string]any +} + +// Result is what the backend produces. The caller is responsible for closing +// ImageReader. +type Result struct { + ImageReader io.ReadCloser + MimeType string + Metadata map[string]any +} + +// Backend is the interface every adapter satisfies. +type Backend interface { + Name() string + Generate(ctx context.Context, req Request) (*Result, error) +} diff --git a/internal/backend/backend_test.go b/internal/backend/backend_test.go new file mode 100644 index 0000000..94c12e5 --- /dev/null +++ b/internal/backend/backend_test.go @@ -0,0 +1,93 @@ +package backend + +import ( + "bytes" + "context" + "image/png" + "io" + "testing" +) + +func TestMockGeneratesValidPNG(t *testing.T) { + be, err := NewMock("mock", nil) + if err != nil { + t.Fatalf("NewMock: %v", err) + } + if be.Name() != "mock" { + t.Errorf("Name() = %q", be.Name()) + } + res, err := be.Generate(context.Background(), Request{ + Prompt: "test prompt", + Width: 64, + Height: 64, + Seed: 1234, + }) + if err != nil { + t.Fatalf("Generate: %v", err) + } + defer res.ImageReader.Close() + if res.MimeType != "image/png" { + t.Errorf("mime = %q", res.MimeType) + } + body, err := io.ReadAll(res.ImageReader) + if err != nil { + t.Fatalf("read body: %v", err) + } + img, err := png.Decode(bytes.NewReader(body)) + if err != nil { + t.Fatalf("decode png: %v", err) + } + if img.Bounds().Dx() != 64 || img.Bounds().Dy() != 64 { + t.Errorf("dims = %v", img.Bounds()) + } + if seed, ok := res.Metadata["seed"].(int64); !ok || seed != 1234 { + t.Errorf("metadata seed = %v", res.Metadata["seed"]) + } +} + +func TestMockDeterministicBySeed(t *testing.T) { + be, _ := NewMock("mock", nil) + gen := func() []byte { + res, err := be.Generate(context.Background(), Request{Prompt: "p", Width: 32, Height: 32, Seed: 99}) + if err != nil { + t.Fatalf("Generate: %v", err) + } + defer res.ImageReader.Close() + b, _ := io.ReadAll(res.ImageReader) + return b + } + a := gen() + b := gen() + if !bytes.Equal(a, b) { + t.Errorf("same seed produced different images: %d vs %d bytes", len(a), len(b)) + } +} + +func TestRegistryBuildAndUnknown(t *testing.T) { + r := NewRegistry() + r.Register("mock", NewMock) + if !r.Has("mock") { + t.Errorf("Has(mock) = false") + } + be, err := r.Build("mock", "instance-1", nil) + if err != nil { + t.Fatalf("Build: %v", err) + } + if be.Name() != "instance-1" { + t.Errorf("Name() = %q", be.Name()) + } + if _, err := r.Build("nope", "x", nil); err == nil { + t.Errorf("expected error for unknown type") + } +} + +func TestRegistryDuplicatePanic(t *testing.T) { + r := NewRegistry() + r.Register("dup", NewMock) + defer func() { + if recover() == nil { + t.Errorf("expected panic on duplicate Register") + } + }() + r.Register("dup", NewMock) +} diff --git a/internal/backend/mock.go b/internal/backend/mock.go new file mode 100644 index 0000000..5afe083 --- /dev/null +++ b/internal/backend/mock.go @@ -0,0 +1,116 @@ +package backend + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/binary" + "fmt" + "image" + "image/color" + "image/png" + "io" + "math/rand" + "time" +) + +// Mock is a deterministic image generator used for tests, smoke checks and as +// the reference implementation of the Backend contract. Same seed + same prompt +// always yields the same PNG. It does not call the network. +type Mock struct { + instance string +} + +// NewMock builds a Mock backend. cfg is accepted for symmetry with real +// adapters but is ignored. +func NewMock(name string, _ map[string]any) (Backend, error) { + if name == "" { + name = "mock" + } + return &Mock{instance: name}, nil +} + +// Name returns the user-facing instance name. +func (m *Mock) Name() string { return m.instance } + +// Generate paints a deterministic gradient sized to req.Width×req.Height and +// returns it as a PNG. Width/Height default to 256 when zero. +func (m *Mock) Generate(ctx context.Context, req Request) (*Result, error) { + w, h := req.Width, req.Height + if w == 0 { + w = 256 + } + if h == 0 { + h = 256 + } + if w < 1 || h < 1 || w > 8192 || h > 8192 { + return nil, fmt.Errorf("mock: invalid size %dx%d", w, h) + } + + seed := req.Seed + if seed == 0 { + seed = derivedSeed(req.Prompt) + } + rng := rand.New(rand.NewSource(seed)) + baseR := uint8(rng.Intn(256)) + baseG := uint8(rng.Intn(256)) + baseB := uint8(rng.Intn(256)) + + start := time.Now() + img := image.NewRGBA(image.Rect(0, 0, w, h)) + for y := 0; y < h; y++ { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + for x := 0; x < w; x++ { + fx := float64(x) / float64(w) + fy := float64(y) / float64(h) + img.Set(x, y, color.RGBA{ + R: blend(baseR, fx), + G: blend(baseG, fy), + B: blend(baseB, (fx+fy)/2), + A: 255, + }) + } + } + var buf bytes.Buffer + if err := png.Encode(&buf, img); err != nil { + return nil, fmt.Errorf("mock: encode png: %w", err) + } + return &Result{ + ImageReader: io.NopCloser(&buf), + MimeType: "image/png", + Metadata: map[string]any{ + "backend": m.instance, + "backend_type": "mock", + "seed": seed, + "width": w, + "height": h, + "latency_ms": time.Since(start).Milliseconds(), + }, + }, nil +} + +func blend(base uint8, f float64) uint8 { + v := float64(base) + (255-float64(base))*f + if v < 0 { + v = 0 + } + if v > 255 { + v = 255 + } + return uint8(v) +} + +// derivedSeed produces a stable int64 seed from a prompt so tests are +// reproducible without forcing the caller to pick one. +func derivedSeed(prompt string) int64 { + sum := sha256.Sum256([]byte(prompt)) + return int64(binary.BigEndian.Uint64(sum[:8]) >> 1) +} + +func init() { + Register("mock", NewMock) +} diff --git a/internal/backend/registry.go b/internal/backend/registry.go new file mode 100644 index 0000000..0d23756 --- /dev/null +++ b/internal/backend/registry.go @@ -0,0 +1,75 @@ +package backend + +import ( + "fmt" + "sort" + "sync" +) + +// Constructor builds a Backend from a name and its sub-block of raw config. +// The framework hands the adapter only its own slice of imagen.yaml, so the +// adapter owns its schema completely. +type Constructor func(name string, cfg map[string]any) (Backend, error) + +// Registry holds the name → Constructor table. Adapters call Register from +// their package init() to make themselves available to the CLI. +type Registry struct { + mu sync.RWMutex + ctors map[string]Constructor +} + +// NewRegistry returns an empty registry. +func NewRegistry() *Registry { + return &Registry{ctors: make(map[string]Constructor)} +} + +// Register adds a constructor under typeName (e.g. "comfyui", "mock"). +// Re-registering an existing type panics — names are global per binary. +func (r *Registry) Register(typeName string, ctor Constructor) { + r.mu.Lock() + defer r.mu.Unlock() + if _, exists := r.ctors[typeName]; exists { + panic(fmt.Sprintf("backend type %q already registered", typeName)) + } + r.ctors[typeName] = ctor +} + +// Build instantiates a backend of typeName using cfg. instanceName is the +// user-facing name from imagen.yaml (e.g. "flux-schnell-local"). +func (r *Registry) Build(typeName, instanceName string, cfg map[string]any) (Backend, error) { + r.mu.RLock() + ctor, ok := r.ctors[typeName] + r.mu.RUnlock() + if !ok { + return nil, fmt.Errorf("backend type %q not registered, available: %v", typeName, r.Types()) + } + return ctor(instanceName, cfg) +} + +// Types returns the registered backend type names, sorted. +func (r *Registry) Types() []string { + r.mu.RLock() + defer r.mu.RUnlock() + out := make([]string, 0, len(r.ctors)) + for k := range r.ctors { + out = append(out, k) + } + sort.Strings(out) + return out +} + +// Has reports whether typeName is registered. +func (r *Registry) Has(typeName string) bool { + r.mu.RLock() + defer r.mu.RUnlock() + _, ok := r.ctors[typeName] + return ok +} + +// Default is the process-wide registry adapters register against. +var Default = NewRegistry() + +// Register is shorthand for Default.Register. +func Register(typeName string, ctor Constructor) { + Default.Register(typeName, ctor) +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..973b087 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,143 @@ +// Package config loads ~/.config/imagen.yaml. The framework knows the global +// shape (default backend + output settings + a per-backend block); each +// adapter owns the schema of its own block. +package config + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "gopkg.in/yaml.v3" +) + +// Config is the top-level shape of imagen.yaml. +type Config struct { + DefaultBackend string `yaml:"default_backend"` + Output OutputConfig `yaml:"output"` + Backends map[string]BackendSpec `yaml:"backends"` +} + +// OutputConfig controls where generated images and metadata sidecars land. +type OutputConfig struct { + Directory string `yaml:"directory"` + Naming string `yaml:"naming"` + WriteMetadataJSON bool `yaml:"write_metadata_json"` +} + +// BackendSpec is one entry under `backends:`. Type identifies the adapter; +// the rest is opaque to the framework and handed to the adapter as-is. +type BackendSpec struct { + Type string `yaml:"type"` + Raw map[string]any `yaml:",inline"` +} + +// DefaultPath returns ~/.config/imagen.yaml, honouring XDG_CONFIG_HOME. +func DefaultPath() (string, error) { + if x := os.Getenv("XDG_CONFIG_HOME"); x != "" { + return filepath.Join(x, "imagen.yaml"), nil + } + home, err := os.UserHomeDir() + if err != nil { + return "", err + } + return filepath.Join(home, ".config", "imagen.yaml"), nil +} + +// Load reads and validates the config at path. If path is empty the default +// path is used. A missing file returns os.ErrNotExist so callers can decide +// whether to fall back to defaults. +func Load(path string) (*Config, error) { + if path == "" { + p, err := DefaultPath() + if err != nil { + return nil, err + } + path = p + } + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + cfg := &Config{} + if err := yaml.Unmarshal(data, cfg); err != nil { + return nil, fmt.Errorf("parse %s: %w", path, err) + } + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("validate %s: %w", path, err) + } + return cfg, nil +} + +// Validate enforces the framework-level invariants. Adapter-level validation +// happens when the adapter constructor runs. +func (c *Config) Validate() error { + if c.DefaultBackend != "" { + if _, ok := c.Backends[c.DefaultBackend]; !ok { + return fmt.Errorf("default_backend %q is not defined under backends:", c.DefaultBackend) + } + } + for name, spec := range c.Backends { + if name == "" { + return errors.New("empty backend name") + } + if spec.Type == "" { + return fmt.Errorf("backend %q is missing a type:", name) + } + } + return nil +} + +// Sample is the canonical example written by `imagen config init`. +const Sample = `# imagen.yaml — config for the imagen CLI. +# Adapters get only their own sub-block at construction. Add a new backend by +# implementing the Backend interface, registering its type name, and listing +# an instance here. + +default_backend: mock + +output: + directory: ~/Pictures/imagen + naming: "{date}-{slug}-{seed}.png" + write_metadata_json: true + +backends: + mock: + type: mock + + flux-schnell-local: + type: comfyui + base_url: http://mrock:8188 + model: flux1-schnell.safetensors + default_steps: 4 + + flux-dev-replicate: + type: replicate + api_token_env: REPLICATE_API_TOKEN + model: black-forest-labs/flux-dev + default_steps: 28 + + dalle3: + type: openai + api_key_env: OPENAI_API_KEY + model: dall-e-3 +` + +// ExpandPath resolves leading ~ to the user's home directory. +func ExpandPath(p string) string { + if p == "" || p[0] != '~' { + return p + } + home, err := os.UserHomeDir() + if err != nil { + return p + } + if len(p) == 1 { + return home + } + if p[1] == '/' { + return filepath.Join(home, p[2:]) + } + return p +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go new file mode 100644 index 0000000..afba5ee --- /dev/null +++ b/internal/config/config_test.go @@ -0,0 +1,70 @@ +package config + +import ( + "os" + "path/filepath" + "testing" +) + +func TestLoadAndValidate(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "imagen.yaml") + if err := os.WriteFile(path, []byte(Sample), 0o644); err != nil { + t.Fatalf("write sample: %v", err) + } + cfg, err := Load(path) + if err != nil { + t.Fatalf("Load: %v", err) + } + if cfg.DefaultBackend != "mock" { + t.Errorf("default = %q", cfg.DefaultBackend) + } + mock, ok := cfg.Backends["mock"] + if !ok { + t.Fatalf("mock backend missing") + } + if mock.Type != "mock" { + t.Errorf("mock type = %q", mock.Type) + } + flux, ok := cfg.Backends["flux-schnell-local"] + if !ok { + t.Fatalf("flux backend missing") + } + if flux.Raw["base_url"] != "http://mrock:8188" { + t.Errorf("flux base_url = %v", flux.Raw["base_url"]) + } +} + +func TestValidateRejectsUnknownDefault(t *testing.T) { + c := &Config{ + DefaultBackend: "ghost", + Backends: map[string]BackendSpec{"real": {Type: "mock"}}, + } + if err := c.Validate(); err == nil { + t.Errorf("expected error for unknown default_backend") + } +} + +func TestValidateRejectsMissingType(t *testing.T) { + c := &Config{ + Backends: map[string]BackendSpec{"x": {}}, + } + if err := c.Validate(); err == nil { + t.Errorf("expected error for missing type") + } +} + +func TestExpandPath(t *testing.T) { + home, _ := os.UserHomeDir() + cases := map[string]string{ + "": "", + "/abs/path": "/abs/path", + "~": home, + "~/foo/bar": filepath.Join(home, "foo/bar"), + } + for in, want := range cases { + if got := ExpandPath(in); got != want { + t.Errorf("ExpandPath(%q) = %q, want %q", in, got, want) + } + } +} diff --git a/internal/output/output.go b/internal/output/output.go new file mode 100644 index 0000000..878f224 --- /dev/null +++ b/internal/output/output.go @@ -0,0 +1,184 @@ +// Package output writes generated images to disk and (optionally) a JSON +// metadata sidecar. Filenames are resolved through a small template language +// with placeholders {date}, {time}, {slug}, {seed}, {backend}, {ext}. +package output + +import ( + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "strings" + "time" +) + +// Writer renders one generation to disk under Directory. +type Writer struct { + Directory string + NameTemplate string + WriteSidecar bool + Now func() time.Time +} + +// Inputs are the ingredients needed to compute a filename and write a sidecar. +type Inputs struct { + Prompt string + Backend string + Seed int64 + Ext string + Metadata map[string]any +} + +// Outputs lists the artefacts the writer produced. +type Outputs struct { + ImagePath string + SidecarPath string +} + +// Write streams img to disk and, if enabled, writes a sidecar. The image +// stream is consumed even on error so callers don't leak goroutines from +// piped readers. +func (w *Writer) Write(img io.Reader, in Inputs) (*Outputs, error) { + now := w.now() + ext := in.Ext + if ext == "" { + ext = "png" + } + tmpl := w.NameTemplate + if tmpl == "" { + tmpl = "{date}-{slug}-{seed}.{ext}" + } + name := renderTemplate(tmpl, map[string]string{ + "date": now.Format("2006-01-02"), + "time": now.Format("150405"), + "slug": Slug(in.Prompt), + "seed": fmt.Sprintf("%d", in.Seed), + "backend": in.Backend, + "ext": strings.TrimPrefix(ext, "."), + }) + + dir := w.Directory + if dir == "" { + dir = "." + } + if err := os.MkdirAll(dir, 0o755); err != nil { + return nil, fmt.Errorf("mkdir %s: %w", dir, err) + } + + imagePath := filepath.Join(dir, name) + f, err := os.Create(imagePath) + if err != nil { + return nil, fmt.Errorf("create %s: %w", imagePath, err) + } + if _, err := io.Copy(f, img); err != nil { + f.Close() + return nil, fmt.Errorf("write %s: %w", imagePath, err) + } + if err := f.Close(); err != nil { + return nil, fmt.Errorf("close %s: %w", imagePath, err) + } + + out := &Outputs{ImagePath: imagePath} + + if w.WriteSidecar { + sidecar := imagePath + ".json" + body := map[string]any{ + "timestamp": now.UTC().Format(time.RFC3339), + "prompt": in.Prompt, + "backend": in.Backend, + "seed": in.Seed, + "image": filepath.Base(imagePath), + "metadata": in.Metadata, + } + data, err := json.MarshalIndent(body, "", " ") + if err != nil { + return out, fmt.Errorf("marshal sidecar: %w", err) + } + if err := os.WriteFile(sidecar, append(data, '\n'), 0o644); err != nil { + return out, fmt.Errorf("write sidecar %s: %w", sidecar, err) + } + out.SidecarPath = sidecar + } + return out, nil +} + +// WriteToPath bypasses templating and writes img to an explicit path. This is +// the path the CLI's --output flag uses. +func (w *Writer) WriteToPath(img io.Reader, path string, in Inputs) (*Outputs, error) { + now := w.now() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return nil, fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err) + } + f, err := os.Create(path) + if err != nil { + return nil, fmt.Errorf("create %s: %w", path, err) + } + if _, err := io.Copy(f, img); err != nil { + f.Close() + return nil, fmt.Errorf("write %s: %w", path, err) + } + if err := f.Close(); err != nil { + return nil, fmt.Errorf("close %s: %w", path, err) + } + out := &Outputs{ImagePath: path} + if w.WriteSidecar { + sidecar := path + ".json" + body := map[string]any{ + "timestamp": now.UTC().Format(time.RFC3339), + "prompt": in.Prompt, + "backend": in.Backend, + "seed": in.Seed, + "image": filepath.Base(path), + "metadata": in.Metadata, + } + data, err := json.MarshalIndent(body, "", " ") + if err != nil { + return out, fmt.Errorf("marshal sidecar: %w", err) + } + if err := os.WriteFile(sidecar, append(data, '\n'), 0o644); err != nil { + return out, fmt.Errorf("write sidecar %s: %w", sidecar, err) + } + out.SidecarPath = sidecar + } + return out, nil +} + +func (w *Writer) now() time.Time { + if w.Now != nil { + return w.Now() + } + return time.Now() +} + +var ( + tmplPlaceholder = regexp.MustCompile(`\{([a-z]+)\}`) + slugAllowed = regexp.MustCompile(`[^a-z0-9]+`) +) + +func renderTemplate(t string, vars map[string]string) string { + return tmplPlaceholder.ReplaceAllStringFunc(t, func(match string) string { + key := match[1 : len(match)-1] + if v, ok := vars[key]; ok { + return v + } + return match + }) +} + +// Slug normalises a prompt fragment into a filesystem-safe token. +func Slug(s string) string { + s = strings.ToLower(s) + s = slugAllowed.ReplaceAllString(s, "-") + s = strings.Trim(s, "-") + if s == "" { + s = "image" + } + const max = 40 + if len(s) > max { + s = s[:max] + s = strings.TrimRight(s, "-") + } + return s +} diff --git a/internal/output/output_test.go b/internal/output/output_test.go new file mode 100644 index 0000000..64e1337 --- /dev/null +++ b/internal/output/output_test.go @@ -0,0 +1,127 @@ +package output + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +func TestSlug(t *testing.T) { + cases := map[string]string{ + "": "image", + " ": "image", + "A Cat in a Fishbowl": "a-cat-in-a-fishbowl", + "!!! weird---input": "weird-input", + "über-cool prompt": "ber-cool-prompt", // ASCII-only by design + strings.Repeat("a", 80): strings.Repeat("a", 40), + } + for in, want := range cases { + if got := Slug(in); got != want { + t.Errorf("Slug(%q) = %q, want %q", in, got, want) + } + } +} + +func TestRenderTemplateAndWrite(t *testing.T) { + dir := t.TempDir() + w := &Writer{ + Directory: dir, + NameTemplate: "{date}-{slug}-{seed}.{ext}", + WriteSidecar: true, + Now: func() time.Time { + return time.Date(2026, 5, 8, 14, 30, 15, 0, time.UTC) + }, + } + body := []byte("PNGbytes") + out, err := w.Write(bytes.NewReader(body), Inputs{ + Prompt: "A cat in a fishbowl", + Backend: "mock", + Seed: 42, + Ext: "png", + Metadata: map[string]any{"foo": "bar"}, + }) + if err != nil { + t.Fatalf("Write: %v", err) + } + want := filepath.Join(dir, "2026-05-08-a-cat-in-a-fishbowl-42.png") + if out.ImagePath != want { + t.Errorf("image path = %q, want %q", out.ImagePath, want) + } + gotBody, err := os.ReadFile(out.ImagePath) + if err != nil { + t.Fatalf("read image: %v", err) + } + if !bytes.Equal(gotBody, body) { + t.Errorf("image body mismatch") + } + if out.SidecarPath == "" { + t.Fatal("sidecar path empty") + } + sc, err := os.ReadFile(out.SidecarPath) + if err != nil { + t.Fatalf("read sidecar: %v", err) + } + var parsed map[string]any + if err := json.Unmarshal(sc, &parsed); err != nil { + t.Fatalf("sidecar json: %v\n%s", err, sc) + } + if parsed["prompt"] != "A cat in a fishbowl" { + t.Errorf("sidecar prompt = %v", parsed["prompt"]) + } + if parsed["backend"] != "mock" { + t.Errorf("sidecar backend = %v", parsed["backend"]) + } + if parsed["timestamp"] != "2026-05-08T14:30:15Z" { + t.Errorf("sidecar timestamp = %v", parsed["timestamp"]) + } + meta, ok := parsed["metadata"].(map[string]any) + if !ok || meta["foo"] != "bar" { + t.Errorf("sidecar metadata = %v", parsed["metadata"]) + } +} + +func TestWriteSkipSidecarWhenDisabled(t *testing.T) { + dir := t.TempDir() + w := &Writer{Directory: dir, WriteSidecar: false} + out, err := w.Write(bytes.NewReader([]byte("x")), Inputs{Prompt: "p", Backend: "b", Seed: 1, Ext: "png"}) + if err != nil { + t.Fatalf("Write: %v", err) + } + if out.SidecarPath != "" { + t.Errorf("sidecar path = %q, want empty", out.SidecarPath) + } +} + +func TestUnknownPlaceholderPassesThrough(t *testing.T) { + got := renderTemplate("{date}-{nonsense}-{seed}", map[string]string{ + "date": "2026-05-08", "seed": "1", + }) + if got != "2026-05-08-{nonsense}-1" { + t.Errorf("got %q", got) + } +} + +func TestWriteToPath(t *testing.T) { + dir := t.TempDir() + target := filepath.Join(dir, "explicit.png") + w := &Writer{WriteSidecar: true} + out, err := w.WriteToPath(bytes.NewReader([]byte("z")), target, Inputs{ + Prompt: "p", Backend: "b", Seed: 7, Ext: "png", + }) + if err != nil { + t.Fatalf("WriteToPath: %v", err) + } + if out.ImagePath != target { + t.Errorf("image path = %q, want %q", out.ImagePath, target) + } + if _, err := os.Stat(target); err != nil { + t.Errorf("expected %s to exist: %v", target, err) + } + if _, err := os.Stat(target + ".json"); err != nil { + t.Errorf("expected sidecar to exist: %v", err) + } +} diff --git a/internal/prompt/prompt.go b/internal/prompt/prompt.go new file mode 100644 index 0000000..86eb0e7 --- /dev/null +++ b/internal/prompt/prompt.go @@ -0,0 +1,63 @@ +// Package prompt enriches a raw user prompt before it reaches a backend. The +// only enrichment today is style-preset suffixes; future passes can add +// negative-prompt resolution, safety filters, etc. +package prompt + +import ( + _ "embed" + "fmt" + "sort" + "strings" + + "gopkg.in/yaml.v3" +) + +//go:embed styles.yaml +var stylesYAML []byte + +type stylesFile struct { + Styles map[string]string `yaml:"styles"` +} + +var defaultStyles map[string]string + +func init() { + var f stylesFile + if err := yaml.Unmarshal(stylesYAML, &f); err != nil { + panic(fmt.Sprintf("prompt: parse embedded styles.yaml: %v", err)) + } + defaultStyles = f.Styles +} + +// Styles returns the names of registered style presets, sorted. +func Styles() []string { + out := make([]string, 0, len(defaultStyles)) + for k := range defaultStyles { + out = append(out, k) + } + sort.Strings(out) + return out +} + +// HasStyle reports whether name is a known preset. +func HasStyle(name string) bool { + _, ok := defaultStyles[name] + return ok +} + +// Apply returns the prompt with the named style preset appended. Unknown +// styles return an error rather than silently passing the prompt through — +// surprising the caller with missing style is worse than a hard error. +func Apply(prompt, style string) (string, error) { + if style == "" { + return prompt, nil + } + suffix, ok := defaultStyles[style] + if !ok { + return "", fmt.Errorf("unknown style %q, available: %v", style, Styles()) + } + if strings.TrimSpace(prompt) == "" { + return suffix, nil + } + return prompt + ", " + suffix, nil +} diff --git a/internal/prompt/prompt_test.go b/internal/prompt/prompt_test.go new file mode 100644 index 0000000..2af8f00 --- /dev/null +++ b/internal/prompt/prompt_test.go @@ -0,0 +1,50 @@ +package prompt + +import "testing" + +func TestApplyKnownStyle(t *testing.T) { + got, err := Apply("a cat", "photo") + if err != nil { + t.Fatalf("Apply: %v", err) + } + want := "a cat, photorealistic, sharp focus, natural lighting" + if got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +func TestApplyEmptyStylePassThrough(t *testing.T) { + got, err := Apply("a cat", "") + if err != nil || got != "a cat" { + t.Errorf("got (%q,%v)", got, err) + } +} + +func TestApplyUnknownStyleErrors(t *testing.T) { + if _, err := Apply("a cat", "nonsense"); err == nil { + t.Errorf("expected error for unknown style") + } +} + +func TestApplyToEmptyPromptUsesPresetOnly(t *testing.T) { + got, err := Apply("", "photo") + if err != nil { + t.Fatalf("Apply: %v", err) + } + if got == "" || got[0] == ',' { + t.Errorf("unexpected output %q", got) + } +} + +func TestStylesContainsAllExpected(t *testing.T) { + want := []string{"blog-header", "diagram", "illustration", "photo", "sketch"} + got := Styles() + if len(got) != len(want) { + t.Fatalf("Styles() = %v, want %v", got, want) + } + for i, w := range want { + if got[i] != w { + t.Errorf("Styles()[%d] = %q, want %q", i, got[i], w) + } + } +} diff --git a/internal/prompt/styles.yaml b/internal/prompt/styles.yaml new file mode 100644 index 0000000..8efd878 --- /dev/null +++ b/internal/prompt/styles.yaml @@ -0,0 +1,6 @@ +styles: + photo: "photorealistic, sharp focus, natural lighting" + illustration: "digital illustration, clean lines, vibrant colors" + diagram: "minimal technical diagram, isometric, white background, line-art" + sketch: "rough pencil sketch, hand-drawn, monochrome" + blog-header: "wide aspect, conceptual, soft palette, editorial illustration" diff --git a/internal/server/server.go b/internal/server/server.go new file mode 100644 index 0000000..ed34597 --- /dev/null +++ b/internal/server/server.go @@ -0,0 +1,31 @@ +// Package server is a placeholder for the HTTP surface that lets non-Go +// callers (skills, agents, otto) drive imagen without exec-ing the CLI. +// +// The CLI already covers the v0 use cases, so this package intentionally +// ships only an interface and a 501 stub. A follow-up issue (tracked after +// the ComfyUI + Replicate adapters land) will flesh it out. +package server + +import ( + "fmt" + "net/http" +) + +// Server is the eventual HTTP shape. Concrete implementations can wrap a +// backend.Registry + config.Config and expose POST /v1/generate. +type Server interface { + Handler() http.Handler +} + +// NotImplemented is the placeholder the CLI wires up if someone calls +// `imagen serve` before the real server lands. +type NotImplemented struct{} + +// Handler returns an http.Handler that responds 501 to every request. +func (NotImplemented) Handler() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + w.WriteHeader(http.StatusNotImplemented) + fmt.Fprintln(w, "imagen HTTP server: not implemented yet — use the CLI for now") + }) +}