services:

  llama_cpp:
    image: {{ llama_cpp_image }}:{{ llama_cpp_image_tag }}
    
    {% if llama_cpp_args %}
    command:
    {{ llama_cpp_args|to_nice_yaml|indent(4) }}
    {% endif %}
    
    deploy:
      resources:
        reservations:
          devices:
          - driver: nvidia
            count: all
            capabilities: [gpu]
    network_mode: host
    volumes:
    - {{ llama_cpp_data_dir }}/models:/models
    - {{ llama_cpp_data_dir }}/hf_cache:/root/.cache
    tty: true
    restart: unless-stopped
    
    {% if llama_cpp_container_env %}
    environment:
    {{ llama_cpp_container_env|to_nice_yaml|indent(6) }}
    {% endif %}
    
    {% if llama_cpp_container_labels %}
    labels:
    {{ llama_cpp_container_labels|to_nice_yaml|indent(4) }}
    {% endif %}

#llama_cpp pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL