From 9b11b5ece9dd771908297790669e0d71cb3a2e99 Mon Sep 17 00:00:00 2001
From: Joe <jjabs@0none.com>
Date: Thu, 26 Mar 2026 18:15:01 +0100
Subject: [PATCH] Broke a few things, added a few more

---
 README.md                                     |  4 +++
 ansible/deploy-llama_cpp.yml                  |  6 ++++
 ansible/deploy-vllm.yml                       |  6 ++++
 .../inventory/example/host_vars/yourpc.yml    |  7 ++--
 ansible/roles/llama_cpp/defaults/main.yml     | 27 ++++++++++++++
 ansible/roles/llama_cpp/tasks/deploy/main.yml | 18 ++++++++++
 ansible/roles/llama_cpp/tasks/main.yml        |  5 +++
 .../roles/llama_cpp/templates/composefile.yml | 35 +++++++++++++++++++
 ansible/roles/ollama/defaults/main.yml        |  2 ++
 .../roles/ollama/templates/composefile.yml    |  6 ++++
 ansible/roles/vllm/defaults/main.yml          | 19 ++++++++++
 ansible/roles/vllm/tasks/deploy/main.yml      | 18 ++++++++++
 ansible/roles/vllm/tasks/main.yml             |  5 +++
 ansible/roles/vllm/templates/composefile.yml  | 33 +++++++++++++++++
 14 files changed, 189 insertions(+), 2 deletions(-)
 create mode 100644 ansible/deploy-llama_cpp.yml
 create mode 100644 ansible/deploy-vllm.yml
 create mode 100644 ansible/roles/llama_cpp/defaults/main.yml
 create mode 100644 ansible/roles/llama_cpp/tasks/deploy/main.yml
 create mode 100644 ansible/roles/llama_cpp/tasks/main.yml
 create mode 100644 ansible/roles/llama_cpp/templates/composefile.yml
 create mode 100644 ansible/roles/vllm/defaults/main.yml
 create mode 100644 ansible/roles/vllm/tasks/deploy/main.yml
 create mode 100644 ansible/roles/vllm/tasks/main.yml
 create mode 100644 ansible/roles/vllm/templates/composefile.yml

diff --git a/README.md b/README.md
index 8b40d3c..46df724 100644
--- a/README.md
+++ b/README.md
@@ -28,3 +28,7 @@ ansible-playbook -i inventory/yourinventory deploy-ai-at-home.yml
 
 This is currently a sloppy work in progress and might or not be developed in future.
 
+Considerations:
+vllm and llama can only provide one image per invocation, so the structure is different from ollama
+Need multiple composefiles for different invocations and configurations instead of a single one
+Requires changes in roles and so on
\ No newline at end of file
diff --git a/ansible/deploy-llama_cpp.yml b/ansible/deploy-llama_cpp.yml
new file mode 100644
index 0000000..d8997ed
--- /dev/null
+++ b/ansible/deploy-llama_cpp.yml
@@ -0,0 +1,6 @@
+---
+- name: Install llama.cpp
+  hosts: llama_cpp
+
+  roles:
+  - llama_cpp
diff --git a/ansible/deploy-vllm.yml b/ansible/deploy-vllm.yml
new file mode 100644
index 0000000..7569591
--- /dev/null
+++ b/ansible/deploy-vllm.yml
@@ -0,0 +1,6 @@
+---
+- name: Install vLLM
+  hosts: vllm
+
+  roles:
+  - vllm
diff --git a/ansible/inventory/example/host_vars/yourpc.yml b/ansible/inventory/example/host_vars/yourpc.yml
index 4afa0ef..c7cfc9b 100644
--- a/ansible/inventory/example/host_vars/yourpc.yml
+++ b/ansible/inventory/example/host_vars/yourpc.yml
@@ -4,5 +4,8 @@ openwebui_install_dir: "{{ ai_at_home_install_dir }}/openwebui"
 
 common_container_labels:
 - sablier.enable=true
-ollama_container_labels: {{ common_container_labels + ["sablier.group=ai_backends"] }}
-openwebui_container_labels: {{ common_container_labels + ["sablier.group=ai_frontends"] }}
+ollama_container_env:
+  OLLAMA_KV_CACHE_TYPE: q8_0
+  OLLAMA_FLASH_ATTENTION: 1
+ollama_container_labels: "{{ common_container_labels + ['sablier.group=ai_backends'] }}"
+openwebui_container_labels: "{{ common_container_labels + ['sablier.group=ai_frontends'] }}"
diff --git a/ansible/roles/llama_cpp/defaults/main.yml b/ansible/roles/llama_cpp/defaults/main.yml
new file mode 100644
index 0000000..06263c0
--- /dev/null
+++ b/ansible/roles/llama_cpp/defaults/main.yml
@@ -0,0 +1,27 @@
+---
+llama_cpp_install_dir: /opt/llama_cpp
+llama_cpp_data_dir: "{{ llama_cpp_install_dir }}/data"
+llama_cpp_image: ghcr.io/ggml-org/llama.cpp
+llama_cpp_image_tag: server-cuda
+llama_cpp_container_labels: []
+llama_cpp_container_env:
+  LLAMA_CACHE: "/hf_cache"
+# TODO: Check how llama persists data
+llama_cpp_model: unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL
+llama_cpp_args:
+- -hf 
+- "{{ llama_cpp_model }}"
+- --port
+- "8090"
+- --host
+- "0.0.0.0"
+- --ctx-size
+- "16384"
+- --temp
+- "0.6"
+- --top-p
+- "0.95"
+- --top-k
+- "20"
+- --min-p
+- "0.00"
diff --git a/ansible/roles/llama_cpp/tasks/deploy/main.yml b/ansible/roles/llama_cpp/tasks/deploy/main.yml
new file mode 100644
index 0000000..4ec3c3e
--- /dev/null
+++ b/ansible/roles/llama_cpp/tasks/deploy/main.yml
@@ -0,0 +1,18 @@
+---
+- name: Create llama.cpp Directories
+  ansible.builtin.file:
+    state: directory
+    path: "{{ llama_cpp_install_dir }}/{{ item }}"
+  loop:
+  - data
+  - compose
+
+- name: Pull llama.cpp Image
+  community.docker.docker_image:
+    source: pull
+    name: "{{ llama_cpp_image }}:{{ llama_cpp_image_tag }}"
+
+- name: Copy composefile
+  ansible.builtin.template:
+    src: templates/composefile.yml
+    dest: "{{ llama_cpp_install_dir }}/compose/composefile.yml"
diff --git a/ansible/roles/llama_cpp/tasks/main.yml b/ansible/roles/llama_cpp/tasks/main.yml
new file mode 100644
index 0000000..0f17a3f
--- /dev/null
+++ b/ansible/roles/llama_cpp/tasks/main.yml
@@ -0,0 +1,5 @@
+---
+- name: Include selected operations for llama.cpp
+  tags:
+  - llama_cpp
+  ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"
\ No newline at end of file
diff --git a/ansible/roles/llama_cpp/templates/composefile.yml b/ansible/roles/llama_cpp/templates/composefile.yml
new file mode 100644
index 0000000..0f34339
--- /dev/null
+++ b/ansible/roles/llama_cpp/templates/composefile.yml
@@ -0,0 +1,35 @@
+services:
+
+  llama_cpp:
+    image: {{ llama_cpp_image }}:{{ llama_cpp_image_tag }}
+    
+    {% if llama_cpp_args %}
+    command:
+    {{ llama_cpp_args|to_nice_yaml|indent(4) }}
+    {% endif %}
+    
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            count: all
+            capabilities: [gpu]
+    network_mode: host
+    volumes:
+    - {{ llama_cpp_data_dir }}/models:/models
+    - {{ llama_cpp_data_dir }}/hf_cache:/root/.cache
+    tty: true
+    restart: unless-stopped
+    
+    {% if llama_cpp_container_env %}
+    environment:
+    {{ llama_cpp_container_env|to_nice_yaml|indent(6) }}
+    {% endif %}
+    
+    {% if llama_cpp_container_labels %}
+    labels:
+    {{ llama_cpp_container_labels|to_nice_yaml|indent(4) }}
+    {% endif %}
+
+#llama_cpp pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
\ No newline at end of file
diff --git a/ansible/roles/ollama/defaults/main.yml b/ansible/roles/ollama/defaults/main.yml
index d8f16c2..4a33e10 100644
--- a/ansible/roles/ollama/defaults/main.yml
+++ b/ansible/roles/ollama/defaults/main.yml
@@ -1,5 +1,7 @@
 ---
 ollama_install_dir: /opt/ollama
+ollama_model_cache: /opt/ollama/data #?
 ollama_image: ollama/ollama
 ollama_image_tag: latest
 ollama_container_labels: []
+ollama_container_env: {}
\ No newline at end of file
diff --git a/ansible/roles/ollama/templates/composefile.yml b/ansible/roles/ollama/templates/composefile.yml
index 0cb63b2..8efe36a 100644
--- a/ansible/roles/ollama/templates/composefile.yml
+++ b/ansible/roles/ollama/templates/composefile.yml
@@ -14,7 +14,13 @@ services:
     - {{ ollama_install_dir }}/data:/root/.ollama
     tty: true
     restart: unless-stopped
+    {% if ollama_container_env %}
+    environment:
+    {{ ollama_container_env|to_nice_yaml|indent(6) }}
+    {% endif %}
     {% if ollama_container_labels %}
     labels:
     {{ ollama_container_labels|to_nice_yaml|indent(4) }}
     {% endif %}
+
+#ollama pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
\ No newline at end of file
diff --git a/ansible/roles/vllm/defaults/main.yml b/ansible/roles/vllm/defaults/main.yml
new file mode 100644
index 0000000..cbbf213
--- /dev/null
+++ b/ansible/roles/vllm/defaults/main.yml
@@ -0,0 +1,19 @@
+---
+vllm_install_dir: /opt/vllm
+vllm_model_dir: "{{ vllm_install_dir }}/data"
+vllm_image: vllm/vllm-openai
+vllm_image_tag: latest
+vllm_container_labels: []
+vllm_container_env: {}
+#https://www.jan.ai/docs/desktop/jan-models/jan-code-4b
+# vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
+#DavidAU/Qwen3.5-40B-Claude-4.6-Opus-Deckard-Heretic-Uncensored-Thinking
+#DavidAU/Qwen3.5-40B-RoughHouse-Claude-4.6-Opus-Polar-Deckard-Uncensored-Heretic-Thinking
+vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
+vllm_args:
+- --enable-prefix-caching
+- --enable-auto-tool-choice 
+- --reasoning-parser 
+- qwen3
+- --tool-call-parser
+- qwen3_coder 
diff --git a/ansible/roles/vllm/tasks/deploy/main.yml b/ansible/roles/vllm/tasks/deploy/main.yml
new file mode 100644
index 0000000..f3f64bf
--- /dev/null
+++ b/ansible/roles/vllm/tasks/deploy/main.yml
@@ -0,0 +1,18 @@
+---
+- name: Create vllm Directories
+  ansible.builtin.file:
+    state: directory
+    path: "{{ vllm_install_dir }}/{{ item }}"
+  loop:
+  - data
+  - compose
+
+- name: Pull vllm Image
+  community.docker.docker_image:
+    source: pull
+    name: "{{ vllm_image }}:{{ vllm_image_tag }}"
+
+- name: Copy composefile
+  ansible.builtin.template:
+    src: templates/composefile.yml
+    dest: "{{ vllm_install_dir }}/compose/composefile.yml"
diff --git a/ansible/roles/vllm/tasks/main.yml b/ansible/roles/vllm/tasks/main.yml
new file mode 100644
index 0000000..868ae4a
--- /dev/null
+++ b/ansible/roles/vllm/tasks/main.yml
@@ -0,0 +1,5 @@
+---
+- name: Include selected operations for vLLM
+  tags:
+  - vllm
+  ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"
\ No newline at end of file
diff --git a/ansible/roles/vllm/templates/composefile.yml b/ansible/roles/vllm/templates/composefile.yml
new file mode 100644
index 0000000..d54e700
--- /dev/null
+++ b/ansible/roles/vllm/templates/composefile.yml
@@ -0,0 +1,33 @@
+services:
+
+  vllm:
+    image: {{ vllm_image }}:{{ vllm_image_tag }}
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            count: all
+            capabilities: [gpu]
+    network_mode: host
+    ipc: host
+    command:
+    # TODO: Fix identation... again
+    - {{ vllm_model }}
+    {% if vllm_args %}
+    {{ vllm_args|to_nice_yaml|indent(4) }}
+    {% endif %}
+    volumes:
+    - {{ vllm_model_dir }}:/root/.cache/huggingface
+    tty: true
+    restart: unless-stopped
+    {% if vllm_container_env %}
+    environment:
+    {{ vllm_container_env|to_nice_yaml|indent(6) }}
+    {% endif %}
+    {% if vllm_container_labels %}
+    labels:
+    {{ vllm_container_labels|to_nice_yaml|indent(4) }}
+    {% endif %}
+
+#vllm pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
\ No newline at end of file