listen: 127.0.0.1:8770 gpu: total_mib: 16376 # RTX 4070 Ti SUPER reserved_mib: 1024 # headroom for system/desktop poll_interval_seconds: 2 routing: tts: mvoice stt: mvoice # whisper-server is alternative if explicitly requested llm: ollama image: comfyui # Audio download proxy: any GET under audio_path_prefix is forwarded to this # consumer at the same path. wa.sh fetches mvoice's generated WAV this way. audio_proxy: mvoice audio_path_prefix: /api/audio/ consumers: mvoice: url: http://localhost:8766 health: method: GET path: /api/health paths: tts: method: POST path: /api/synthesize stt: method: POST path: /api/transcribe vram_resident_mib: 2800 load: method: POST path: /api/admin/load unload: method: POST path: /api/admin/unload can_coexist_with: [whisper-server, ollama] priority: 3 max_concurrency: 1 whisper-server: url: http://localhost:8178 health: method: GET path: / paths: stt: method: POST path: /inference vram_resident_mib: 2050 # No HTTP unload; mGPUmanager evicts via systemd restart (Schritt 5). systemd_unit: whisper-server.service can_coexist_with: [mvoice, ollama] priority: 2 max_concurrency: 1 ollama: url: http://localhost:11434 health: method: GET path: /api/tags paths: llm: method: POST path: /api/generate # Ollama runs its own LRU keep_alive; we don't track resident VRAM. vram_managed: true can_coexist_with: [mvoice, whisper-server] priority: 2 max_concurrency: 1 comfyui: url: http://localhost:8188 health: method: GET path: /system_stats paths: image: method: POST path: /prompt vram_resident_mib: 13000 unload: method: POST path: /api/free body: '{"unload_models":true,"free_memory":true}' can_coexist_with: [] priority: 1 max_concurrency: 1