Broke a few things, added a few more

2026-03-26 18:15:01 +01:00
parent f3ce5a8880
commit 9b11b5ece9
14 changed files with 189 additions and 2 deletions
@@ -28,3 +28,7 @@ ansible-playbook -i inventory/yourinventory deploy-ai-at-home.yml

 This is currently a sloppy work in progress and might or not be developed in future.

+Considerations:
+vllm and llama can only provide one image per invocation, so the structure is different from ollama
+Need multiple composefiles for different invocations and configurations instead of a single one
+Requires changes in roles and so on
@@ -0,0 +1,6 @@
+---
+- name: Install llama.cpp
+  hosts: llama_cpp
+
+  roles:
+  - llama_cpp
@@ -0,0 +1,6 @@
+---
+- name: Install vLLM
+  hosts: vllm
+
+  roles:
+  - vllm
@@ -4,5 +4,8 @@ openwebui_install_dir: "{{ ai_at_home_install_dir }}/openwebui"

 common_container_labels:
 - sablier.enable=true
-ollama_container_labels: {{ common_container_labels + ["sablier.group=ai_backends"] }}
-openwebui_container_labels: {{ common_container_labels + ["sablier.group=ai_frontends"] }}
+ollama_container_env:
+  OLLAMA_KV_CACHE_TYPE: q8_0
+  OLLAMA_FLASH_ATTENTION: 1
+ollama_container_labels: "{{ common_container_labels + ['sablier.group=ai_backends'] }}"
+openwebui_container_labels: "{{ common_container_labels + ['sablier.group=ai_frontends'] }}"
@@ -0,0 +1,27 @@
+---
+llama_cpp_install_dir: /opt/llama_cpp
+llama_cpp_data_dir: "{{ llama_cpp_install_dir }}/data"
+llama_cpp_image: ghcr.io/ggml-org/llama.cpp
+llama_cpp_image_tag: server-cuda
+llama_cpp_container_labels: []
+llama_cpp_container_env:
+  LLAMA_CACHE: "/hf_cache"
+# TODO: Check how llama persists data
+llama_cpp_model: unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL
+llama_cpp_args:
+- -hf 
+- "{{ llama_cpp_model }}"
+- --port
+- "8090"
+- --host
+- "0.0.0.0"
+- --ctx-size
+- "16384"
+- --temp
+- "0.6"
+- --top-p
+- "0.95"
+- --top-k
+- "20"
+- --min-p
+- "0.00"
@@ -0,0 +1,18 @@
+---
+- name: Create llama.cpp Directories
+  ansible.builtin.file:
+    state: directory
+    path: "{{ llama_cpp_install_dir }}/{{ item }}"
+  loop:
+  - data
+  - compose
+
+- name: Pull llama.cpp Image
+  community.docker.docker_image:
+    source: pull
+    name: "{{ llama_cpp_image }}:{{ llama_cpp_image_tag }}"
+
+- name: Copy composefile
+  ansible.builtin.template:
+    src: templates/composefile.yml
+    dest: "{{ llama_cpp_install_dir }}/compose/composefile.yml"
@@ -0,0 +1,5 @@
+---
+- name: Include selected operations for llama.cpp
+  tags:
+  - llama_cpp
+  ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"
@@ -0,0 +1,35 @@
+services:
+
+  llama_cpp:
+    image: {{ llama_cpp_image }}:{{ llama_cpp_image_tag }}
+    
+    {% if llama_cpp_args %}
+    command:
+    {{ llama_cpp_args|to_nice_yaml|indent(4) }}
+    {% endif %}
+    
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            count: all
+            capabilities: [gpu]
+    network_mode: host
+    volumes:
+    - {{ llama_cpp_data_dir }}/models:/models
+    - {{ llama_cpp_data_dir }}/hf_cache:/root/.cache
+    tty: true
+    restart: unless-stopped
+    
+    {% if llama_cpp_container_env %}
+    environment:
+    {{ llama_cpp_container_env|to_nice_yaml|indent(6) }}
+    {% endif %}
+    
+    {% if llama_cpp_container_labels %}
+    labels:
+    {{ llama_cpp_container_labels|to_nice_yaml|indent(4) }}
+    {% endif %}
+
+#llama_cpp pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
@@ -1,5 +1,7 @@
 ---
 ollama_install_dir: /opt/ollama
+ollama_model_cache: /opt/ollama/data #?
 ollama_image: ollama/ollama
 ollama_image_tag: latest
 ollama_container_labels: []
+ollama_container_env: {}
@@ -14,7 +14,13 @@ services:
    - {{ ollama_install_dir }}/data:/root/.ollama
    tty: true
    restart: unless-stopped
+    {% if ollama_container_env %}
+    environment:
+    {{ ollama_container_env|to_nice_yaml|indent(6) }}
+    {% endif %}
    {% if ollama_container_labels %}
    labels:
    {{ ollama_container_labels|to_nice_yaml|indent(4) }}
    {% endif %}
+
+#ollama pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
@@ -0,0 +1,19 @@
+---
+vllm_install_dir: /opt/vllm
+vllm_model_dir: "{{ vllm_install_dir }}/data"
+vllm_image: vllm/vllm-openai
+vllm_image_tag: latest
+vllm_container_labels: []
+vllm_container_env: {}
+#https://www.jan.ai/docs/desktop/jan-models/jan-code-4b
+# vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
+#DavidAU/Qwen3.5-40B-Claude-4.6-Opus-Deckard-Heretic-Uncensored-Thinking
+#DavidAU/Qwen3.5-40B-RoughHouse-Claude-4.6-Opus-Polar-Deckard-Uncensored-Heretic-Thinking
+vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
+vllm_args:
+- --enable-prefix-caching
+- --enable-auto-tool-choice 
+- --reasoning-parser 
+- qwen3
+- --tool-call-parser
+- qwen3_coder 
@@ -0,0 +1,18 @@
+---
+- name: Create vllm Directories
+  ansible.builtin.file:
+    state: directory
+    path: "{{ vllm_install_dir }}/{{ item }}"
+  loop:
+  - data
+  - compose
+
+- name: Pull vllm Image
+  community.docker.docker_image:
+    source: pull
+    name: "{{ vllm_image }}:{{ vllm_image_tag }}"
+
+- name: Copy composefile
+  ansible.builtin.template:
+    src: templates/composefile.yml
+    dest: "{{ vllm_install_dir }}/compose/composefile.yml"
@@ -0,0 +1,5 @@
+---
+- name: Include selected operations for vLLM
+  tags:
+  - vllm
+  ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"
@@ -0,0 +1,33 @@
+services:
+
+  vllm:
+    image: {{ vllm_image }}:{{ vllm_image_tag }}
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            count: all
+            capabilities: [gpu]
+    network_mode: host
+    ipc: host
+    command:
+    # TODO: Fix identation... again
+    - {{ vllm_model }}
+    {% if vllm_args %}
+    {{ vllm_args|to_nice_yaml|indent(4) }}
+    {% endif %}
+    volumes:
+    - {{ vllm_model_dir }}:/root/.cache/huggingface
+    tty: true
+    restart: unless-stopped
+    {% if vllm_container_env %}
+    environment:
+    {{ vllm_container_env|to_nice_yaml|indent(6) }}
+    {% endif %}
+    {% if vllm_container_labels %}
+    labels:
+    {{ vllm_container_labels|to_nice_yaml|indent(4) }}
+    {% endif %}
+
+#vllm pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL