Broke a few things, added a few more

2026-03-26 18:15:01 +01:00
parent f3ce5a8880
commit 9b11b5ece9
14 changed files with 189 additions and 2 deletions
@@ -0,0 +1,27 @@
+---
+llama_cpp_install_dir: /opt/llama_cpp
+llama_cpp_data_dir: "{{ llama_cpp_install_dir }}/data"
+llama_cpp_image: ghcr.io/ggml-org/llama.cpp
+llama_cpp_image_tag: server-cuda
+llama_cpp_container_labels: []
+llama_cpp_container_env:
+  LLAMA_CACHE: "/hf_cache"
+# TODO: Check how llama persists data
+llama_cpp_model: unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL
+llama_cpp_args:
+- -hf 
+- "{{ llama_cpp_model }}"
+- --port
+- "8090"
+- --host
+- "0.0.0.0"
+- --ctx-size
+- "16384"
+- --temp
+- "0.6"
+- --top-p
+- "0.95"
+- --top-k
+- "20"
+- --min-p
+- "0.00"
@@ -0,0 +1,18 @@
+---
+- name: Create llama.cpp Directories
+  ansible.builtin.file:
+    state: directory
+    path: "{{ llama_cpp_install_dir }}/{{ item }}"
+  loop:
+  - data
+  - compose
+
+- name: Pull llama.cpp Image
+  community.docker.docker_image:
+    source: pull
+    name: "{{ llama_cpp_image }}:{{ llama_cpp_image_tag }}"
+
+- name: Copy composefile
+  ansible.builtin.template:
+    src: templates/composefile.yml
+    dest: "{{ llama_cpp_install_dir }}/compose/composefile.yml"
@@ -0,0 +1,5 @@
+---
+- name: Include selected operations for llama.cpp
+  tags:
+  - llama_cpp
+  ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"
@@ -0,0 +1,35 @@
+services:
+
+  llama_cpp:
+    image: {{ llama_cpp_image }}:{{ llama_cpp_image_tag }}
+    
+    {% if llama_cpp_args %}
+    command:
+    {{ llama_cpp_args|to_nice_yaml|indent(4) }}
+    {% endif %}
+    
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            count: all
+            capabilities: [gpu]
+    network_mode: host
+    volumes:
+    - {{ llama_cpp_data_dir }}/models:/models
+    - {{ llama_cpp_data_dir }}/hf_cache:/root/.cache
+    tty: true
+    restart: unless-stopped
+    
+    {% if llama_cpp_container_env %}
+    environment:
+    {{ llama_cpp_container_env|to_nice_yaml|indent(6) }}
+    {% endif %}
+    
+    {% if llama_cpp_container_labels %}
+    labels:
+    {{ llama_cpp_container_labels|to_nice_yaml|indent(4) }}
+    {% endif %}
+
+#llama_cpp pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL