Merge deploy-time fixes (systemd --user unit, initialLoaded heuristic)
This commit is contained in:
11
Makefile
11
Makefile
@@ -3,8 +3,8 @@
|
|||||||
# `make build` — compile the Go binary into ./bin/mgpumanager.
|
# `make build` — compile the Go binary into ./bin/mgpumanager.
|
||||||
# `make test` — go test ./...
|
# `make test` — go test ./...
|
||||||
# `make run` — run locally against ./config/consumers.yaml.
|
# `make run` — run locally against ./config/consumers.yaml.
|
||||||
# `make deploy` — rsync binary + config + systemd unit to mRock,
|
# `make deploy` — rsync binary + config + user-unit to mRock and
|
||||||
# reload systemd, restart the service.
|
# (re)start it under `systemctl --user`.
|
||||||
|
|
||||||
BIN := bin/mgpumanager
|
BIN := bin/mgpumanager
|
||||||
PKG := ./cmd/mgpumanager
|
PKG := ./cmd/mgpumanager
|
||||||
@@ -12,6 +12,7 @@ PKG := ./cmd/mgpumanager
|
|||||||
GO ?= go
|
GO ?= go
|
||||||
HOST ?= mrock
|
HOST ?= mrock
|
||||||
REMOTE_DIR ?= /home/m/dev/mGPUmanager
|
REMOTE_DIR ?= /home/m/dev/mGPUmanager
|
||||||
|
USER_UNIT_DIR ?= /home/m/.config/systemd/user
|
||||||
|
|
||||||
.PHONY: build test run deploy clean
|
.PHONY: build test run deploy clean
|
||||||
|
|
||||||
@@ -25,11 +26,13 @@ test:
|
|||||||
run: build
|
run: build
|
||||||
./$(BIN) --config config/consumers.yaml --log-level debug
|
./$(BIN) --config config/consumers.yaml --log-level debug
|
||||||
|
|
||||||
|
# Deploys to mRock as a user unit (systemd --user). User lingering must
|
||||||
|
# be enabled on the target host: `sudo loginctl enable-linger m`.
|
||||||
deploy: build
|
deploy: build
|
||||||
rsync -a --mkpath $(BIN) $(HOST):$(REMOTE_DIR)/$(BIN)
|
rsync -a --mkpath $(BIN) $(HOST):$(REMOTE_DIR)/$(BIN)
|
||||||
rsync -a --mkpath config/consumers.yaml $(HOST):$(REMOTE_DIR)/config/consumers.yaml
|
rsync -a --mkpath config/consumers.yaml $(HOST):$(REMOTE_DIR)/config/consumers.yaml
|
||||||
rsync -a --mkpath systemd/mgpumanager.service $(HOST):$(REMOTE_DIR)/systemd/mgpumanager.service
|
rsync -a --mkpath systemd/mgpumanager.service $(HOST):$(USER_UNIT_DIR)/mgpumanager.service
|
||||||
ssh $(HOST) "sudo cp $(REMOTE_DIR)/systemd/mgpumanager.service /etc/systemd/system/mgpumanager.service && sudo systemctl daemon-reload && sudo systemctl enable mgpumanager.service && sudo systemctl restart mgpumanager.service && sudo systemctl status mgpumanager.service --no-pager -l"
|
ssh $(HOST) "systemctl --user daemon-reload && systemctl --user enable mgpumanager.service && systemctl --user restart mgpumanager.service && systemctl --user status mgpumanager.service --no-pager -l"
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf bin
|
rm -rf bin
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
listen: 127.0.0.1:8770
|
listen: 0.0.0.0:8770
|
||||||
|
|
||||||
gpu:
|
gpu:
|
||||||
total_mib: 16376 # RTX 4070 Ti SUPER
|
total_mib: 16376 # RTX 4070 Ti SUPER
|
||||||
|
|||||||
@@ -70,13 +70,38 @@ func NewEvicting(cfg *config.Config, reg *registry.Registry, gpuPoller *gpu.Poll
|
|||||||
lastUsed: make(map[string]time.Time, len(cfg.Consumers)),
|
lastUsed: make(map[string]time.Time, len(cfg.Consumers)),
|
||||||
}
|
}
|
||||||
for name, cons := range cfg.Consumers {
|
for name, cons := range cfg.Consumers {
|
||||||
// Self-managed VRAM consumers (ollama) are always 'loaded' from
|
e.loaded[name] = initialLoaded(cons)
|
||||||
// the scheduler's perspective — we never evict them via HTTP.
|
|
||||||
e.loaded[name] = !cons.VRAMManaged || true
|
|
||||||
}
|
}
|
||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// initialLoaded picks the believed-loaded state for a consumer at scheduler
|
||||||
|
// startup. The rule:
|
||||||
|
//
|
||||||
|
// - VRAM-managed (ollama): true — we never track or evict it.
|
||||||
|
// - Has a load route AND an unload route (mvoice): true — the consumer
|
||||||
|
// is set up to be controllable in both directions, and typically
|
||||||
|
// preloads on its own systemd-managed startup.
|
||||||
|
// - Has only an unload route, no load route (comfyui): false — lazy.
|
||||||
|
// FLUX isn't resident until the first /prompt; until that happens we
|
||||||
|
// don't account for its VRAM cost.
|
||||||
|
// - Has a systemd_unit but no HTTP routes (whisper-server): true — these
|
||||||
|
// are always-on services that load their model at process start.
|
||||||
|
// - Neither: true — fallback, assume it's there if the consumer is up.
|
||||||
|
//
|
||||||
|
// Getting this right matters for the eviction smoke test: if comfyui were
|
||||||
|
// believed loaded at startup, ensureFits would short-circuit on the first
|
||||||
|
// /v1/image request and never trigger eviction. (m/mGPUmanager#1 live deploy.)
|
||||||
|
func initialLoaded(cons *config.Consumer) bool {
|
||||||
|
if cons.VRAMManaged {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if cons.Load == nil && cons.Unload != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// Run is the public Scheduler interface: ensure room + load + serialise.
|
// Run is the public Scheduler interface: ensure room + load + serialise.
|
||||||
func (e *Evicting) Run(ctx context.Context, consumer string, fn Job) error {
|
func (e *Evicting) Run(ctx context.Context, consumer string, fn Job) error {
|
||||||
if err := e.ensureFits(ctx, consumer); err != nil {
|
if err := e.ensureFits(ctx, consumer); err != nil {
|
||||||
|
|||||||
@@ -118,6 +118,37 @@ func buildCfg(mvoiceURL, comfyURL string) *config.Config {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestInitialLoadedHeuristic pins the comfyui-isn't-preloaded rule down:
|
||||||
|
// a consumer with Unload but no Load is lazy; everything else is assumed
|
||||||
|
// resident at startup.
|
||||||
|
func TestInitialLoadedHeuristic(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
cons *config.Consumer
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"vram_managed (ollama)", &config.Consumer{VRAMManaged: true}, true},
|
||||||
|
{"load+unload (mvoice)", &config.Consumer{
|
||||||
|
Load: &config.Route{Path: "/load"},
|
||||||
|
Unload: &config.Route{Path: "/unload"},
|
||||||
|
}, true},
|
||||||
|
{"unload only — lazy (comfyui)", &config.Consumer{
|
||||||
|
Unload: &config.Route{Path: "/api/free"},
|
||||||
|
}, false},
|
||||||
|
{"systemd unit only (whisper-server)", &config.Consumer{
|
||||||
|
SystemdUnit: "whisper-server.service",
|
||||||
|
}, true},
|
||||||
|
{"empty consumer", &config.Consumer{}, true},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
if got := initialLoaded(c.cons); got != c.want {
|
||||||
|
t.Errorf("initialLoaded = %v, want %v", got, c.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestEvictingSkipsWhenAlreadyResident verifies the no-op fast path: a job
|
// TestEvictingSkipsWhenAlreadyResident verifies the no-op fast path: a job
|
||||||
// for an already-loaded consumer with plenty of free VRAM runs without any
|
// for an already-loaded consumer with plenty of free VRAM runs without any
|
||||||
// unload call.
|
// unload call.
|
||||||
|
|||||||
@@ -1,30 +1,15 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=mGPUmanager — GPU-Inference-Control-Plane for mRock
|
Description=mGPUmanager — GPU-Inference-Control-Plane
|
||||||
Documentation=https://mgit.msbls.de/m/mGPUmanager
|
Documentation=https://mgit.msbls.de/m/mGPUmanager
|
||||||
After=network-online.target
|
After=network.target
|
||||||
Wants=network-online.target
|
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
User=m
|
WorkingDirectory=%h/dev/mGPUmanager
|
||||||
Group=m
|
ExecStart=%h/dev/mGPUmanager/bin/mgpumanager --config %h/dev/mGPUmanager/config/consumers.yaml --log-level info
|
||||||
WorkingDirectory=/home/m/dev/mGPUmanager
|
|
||||||
ExecStart=/home/m/dev/mGPUmanager/bin/mgpumanager \
|
|
||||||
--config /home/m/dev/mGPUmanager/config/consumers.yaml \
|
|
||||||
--log-level info
|
|
||||||
Restart=on-failure
|
Restart=on-failure
|
||||||
RestartSec=3
|
RestartSec=5
|
||||||
TimeoutStopSec=10
|
TimeoutStopSec=10
|
||||||
|
|
||||||
# Hardening — broker has no need for elevated capabilities.
|
|
||||||
NoNewPrivileges=true
|
|
||||||
PrivateTmp=true
|
|
||||||
ProtectSystem=strict
|
|
||||||
ProtectHome=read-only
|
|
||||||
ReadWritePaths=/home/m/dev/mGPUmanager
|
|
||||||
|
|
||||||
# The broker only proxies; nvidia-smi is the only GPU-touching call.
|
|
||||||
PrivateDevices=false
|
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=default.target
|
||||||
|
|||||||
Reference in New Issue
Block a user