Go daemon listening on :8770 that fronts mvoice (8766), whisper-server
(8178), ollama (11434), comfyui (8188) behind a single /v1 façade.
What this MVP does:
- Loads config/consumers.yaml: routing table, per-consumer URL + health +
paths + vram_resident_mib + can_coexist_with + load/unload routes.
- Background health probe (5s) on every consumer; refuses fast with a
structured 503 if the last probe failed (no Felix-Banholzer-style
silent fallback).
- POST /v1/{tts,stt,llm,image} proxies the request body + Content-Type
to the routed consumer's path and streams the response back.
- GET /audio/* proxies to audio_proxy consumer (wa.sh fetches its WAV
this way).
- GET /v1/status exposes live GPU sample (nvidia-smi every 2s),
per-consumer health/loaded/gpu_resident_mib/active/total_requests,
scheduler stats.
- GET /healthz, GET / — broker liveness.
The Scheduler interface is in place but the implementation is
'Passthrough' — every job runs immediately, no lock, no queue. Schritt 4
replaces it with a serialising mutex; Schritt 5 adds VRAM-pressure
eviction. The interface boundary means server.go stays unchanged.
Out of scope here:
- Schritt 3: wa.sh migration (parallel work in mAi).
- Schritt 4: queue + global GPU lock.
- Schritt 5: nvidia-smi-driven LRU eviction.
Tests: config validation (good/bad), proxy forwards body, audio proxy
streams bytes, unhealthy consumer returns 503, /v1/status JSON shape.
Refs: m/mGPUmanager#1
104 lines
2.8 KiB
Go
104 lines
2.8 KiB
Go
// mgpumanager is the GPU-Inference-Control-Plane for mRock.
|
|
//
|
|
// One Go binary that:
|
|
// 1. Loads consumers.yaml.
|
|
// 2. Probes every consumer's /health on a 5s cadence.
|
|
// 3. Polls nvidia-smi every 2s for live VRAM usage (used by Schritt 5
|
|
// eviction).
|
|
// 4. Exposes /v1/{tts,stt,llm,image} as a thin proxy + /v1/status for
|
|
// observability.
|
|
// 5. Funnels every job through the Scheduler (passthrough today, queue +
|
|
// eviction in Schritt 4-5).
|
|
//
|
|
// All client routing happens through this daemon — no consumer is reached
|
|
// directly any more. wa.sh, ImaGen, m-CLI and Furbotto-Voice will all speak
|
|
// to :8770/v1/*.
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"flag"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"syscall"
|
|
"time"
|
|
|
|
"mgit.msbls.de/m/mGPUmanager/internal/config"
|
|
"mgit.msbls.de/m/mGPUmanager/internal/gpu"
|
|
"mgit.msbls.de/m/mGPUmanager/internal/registry"
|
|
"mgit.msbls.de/m/mGPUmanager/internal/scheduler"
|
|
"mgit.msbls.de/m/mGPUmanager/internal/server"
|
|
)
|
|
|
|
func main() {
|
|
configPath := flag.String("config", "config/consumers.yaml", "path to consumers.yaml")
|
|
listenOverride := flag.String("listen", "", "override listen address from config")
|
|
logLevel := flag.String("log-level", "info", "log level: debug|info|warn|error")
|
|
flag.Parse()
|
|
|
|
logger := newLogger(*logLevel)
|
|
|
|
cfg, err := config.Load(*configPath)
|
|
if err != nil {
|
|
logger.Error("config load failed", "err", err, "path", *configPath)
|
|
os.Exit(1)
|
|
}
|
|
if *listenOverride != "" {
|
|
cfg.Listen = *listenOverride
|
|
}
|
|
|
|
logger.Info("starting mGPUmanager",
|
|
"listen", cfg.Listen,
|
|
"consumers", len(cfg.Consumers),
|
|
"poll_interval", cfg.GPU.PollInterval(),
|
|
)
|
|
|
|
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
|
defer cancel()
|
|
|
|
reg := registry.New(cfg, logger.With("component", "registry"))
|
|
gpuPoller := gpu.NewPoller(cfg.GPU.PollInterval(), logger.With("component", "gpu"))
|
|
sched := scheduler.NewPassthrough(reg)
|
|
|
|
go reg.Run(ctx)
|
|
go gpuPoller.Run(ctx)
|
|
|
|
srv := server.New(cfg, reg, gpuPoller, sched, logger.With("component", "server"))
|
|
httpSrv := &http.Server{
|
|
Addr: cfg.Listen,
|
|
Handler: srv.Handler(),
|
|
ReadHeaderTimeout: 10 * time.Second,
|
|
}
|
|
|
|
go func() {
|
|
<-ctx.Done()
|
|
shutCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
_ = httpSrv.Shutdown(shutCtx)
|
|
}()
|
|
|
|
if err := httpSrv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
|
logger.Error("listen failed", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
logger.Info("shutdown complete")
|
|
}
|
|
|
|
func newLogger(level string) *slog.Logger {
|
|
var lvl slog.Level
|
|
switch level {
|
|
case "debug":
|
|
lvl = slog.LevelDebug
|
|
case "warn":
|
|
lvl = slog.LevelWarn
|
|
case "error":
|
|
lvl = slog.LevelError
|
|
default:
|
|
lvl = slog.LevelInfo
|
|
}
|
|
return slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: lvl}))
|
|
}
|