services: vllm: image: {{ vllm_image }}:{{ vllm_image_tag }} deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] network_mode: host ipc: host command: # TODO: Fix identation... again - {{ vllm_model }} {% if vllm_args %} {{ vllm_args|to_nice_yaml|indent(4) }} {% endif %} volumes: - {{ vllm_model_dir }}:/root/.cache/huggingface tty: true restart: unless-stopped {% if vllm_container_env %} environment: {{ vllm_container_env|to_nice_yaml|indent(6) }} {% endif %} {% if vllm_container_labels %} labels: {{ vllm_container_labels|to_nice_yaml|indent(4) }} {% endif %} #vllm pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL