Files
mGPUmanager/internal/scheduler/locked_test.go
mAi 3b3d828e9e feat: Schritt 4 — Locked scheduler (global GPU lock, queue, stats)
Replaces the MVP Passthrough with scheduler.Locked: a capacity-1 channel
serialises every consumer's GPU work end-to-end. main.go switches to it.

Behavioural contract:
- Jobs that arrive while another job holds the GPU block on the channel
  until the holder finishes. Context cancellation aborts the wait
  cleanly (no leaked tokens, queue depth decremented).
- Stats track queue_depth, in_flight, total_jobs, last_wait_ms,
  last_run_ms, oldest_queued — surfaced through /v1/status.
- One lock for ALL consumers (not per-consumer): the design (§4.3) is
  explicit that grobgranular > GPU-stream-granular on single-GPU
  single-user hardware. mvoice + ollama + comfyui never run truly
  concurrently any more, which is the whole point — that's what
  produced the CUDA-OOM under load.

Tests:
- 5 goroutines hammer the scheduler concurrently → max in-flight = 1.
- Cancellation while parked on the lock returns ctx.Err() and frees
  the queue slot.
- Stats reflect in-flight + queue-depth transitions correctly.
- Race detector clean.

Schritt 5 will compose this with VRAM-pressure eviction: before
acquiring the lock, check if the target consumer's resident cost fits
under the current GPU headroom; if not, unload the LRU non-coexistent
consumer first.

Refs: m/mGPUmanager#1 (Schritt 4).
2026-05-11 13:33:39 +02:00

165 lines
3.8 KiB
Go

package scheduler
import (
"context"
"errors"
"log/slog"
"io"
"sync"
"sync/atomic"
"testing"
"time"
"mgit.msbls.de/m/mGPUmanager/internal/config"
"mgit.msbls.de/m/mGPUmanager/internal/registry"
)
func newReg() *registry.Registry {
cfg := &config.Config{
Consumers: map[string]*config.Consumer{
"mvoice": {
URL: "http://localhost:8766",
Health: config.Route{Method: "GET", Path: "/api/health"},
},
},
}
return registry.New(cfg, slog.New(slog.NewTextHandler(io.Discard, nil)))
}
// TestLockedSerialisesConcurrentJobs is the regression test for the
// CUDA-OOM-from-parallel-loaders class: two TTS calls that arrive at the
// same time must run sequentially, not concurrently.
func TestLockedSerialisesConcurrentJobs(t *testing.T) {
sched := NewLocked(newReg(), 1)
var maxConcurrent atomic.Int32
var inFlight atomic.Int32
job := func(ctx context.Context) error {
now := inFlight.Add(1)
// Update max in a CAS loop (small N, never contested in practice).
for {
cur := maxConcurrent.Load()
if now <= cur || maxConcurrent.CompareAndSwap(cur, now) {
break
}
}
time.Sleep(20 * time.Millisecond)
inFlight.Add(-1)
return nil
}
var wg sync.WaitGroup
const n = 5
for range n {
wg.Go(func() {
if err := sched.Run(context.Background(), "mvoice", job); err != nil {
t.Errorf("Run: %v", err)
}
})
}
wg.Wait()
if got := maxConcurrent.Load(); got != 1 {
t.Fatalf("max concurrent jobs = %d, want 1", got)
}
if got := sched.Stats().TotalJobs; got != n {
t.Errorf("Stats.TotalJobs = %d, want %d", got, n)
}
}
func TestLockedRespectsContextCancel(t *testing.T) {
sched := NewLocked(newReg(), 1)
// Hold the lock with a long-running job.
started := make(chan struct{})
done := make(chan struct{})
go func() {
_ = sched.Run(context.Background(), "mvoice", func(ctx context.Context) error {
close(started)
<-done
return nil
})
}()
<-started
// Now try to run with a context that we'll cancel.
ctx, cancel := context.WithCancel(context.Background())
errCh := make(chan error, 1)
go func() {
errCh <- sched.Run(ctx, "mvoice", func(ctx context.Context) error {
t.Error("second job should not run after cancellation")
return nil
})
}()
// Give the second job time to queue.
time.Sleep(10 * time.Millisecond)
cancel()
select {
case err := <-errCh:
if !errors.Is(err, context.Canceled) {
t.Fatalf("got err=%v, want context.Canceled", err)
}
case <-time.After(time.Second):
t.Fatal("cancelled Run did not return within 1s")
}
// Release the holder.
close(done)
}
func TestLockedStatsTrackInFlightAndQueue(t *testing.T) {
sched := NewLocked(newReg(), 1)
jobStart := make(chan struct{})
jobBlock := make(chan struct{})
go func() {
_ = sched.Run(context.Background(), "mvoice", func(ctx context.Context) error {
close(jobStart)
<-jobBlock
return nil
})
}()
<-jobStart
// Inside the holding job: InFlight==1, QueueDepth==0.
s := sched.Stats()
if s.InFlight != 1 {
t.Errorf("InFlight while holding = %d, want 1", s.InFlight)
}
if s.QueueDepth != 0 {
t.Errorf("QueueDepth = %d, want 0", s.QueueDepth)
}
// Queue a waiter and verify QueueDepth grows.
waitStarted := make(chan struct{})
waitDone := make(chan struct{})
go func() {
close(waitStarted)
_ = sched.Run(context.Background(), "mvoice", func(ctx context.Context) error {
return nil
})
close(waitDone)
}()
<-waitStarted
// Wait for the waiter to actually be parked on the channel.
deadline := time.Now().Add(time.Second)
for time.Now().Before(deadline) {
if sched.Stats().QueueDepth == 1 {
break
}
time.Sleep(2 * time.Millisecond)
}
if got := sched.Stats().QueueDepth; got != 1 {
t.Errorf("QueueDepth with one waiter = %d, want 1", got)
}
close(jobBlock)
<-waitDone
if got := sched.Stats().InFlight; got != 0 {
t.Errorf("InFlight after both done = %d, want 0", got)
}
}