From 9b11b5ece9dd771908297790669e0d71cb3a2e99 Mon Sep 17 00:00:00 2001 From: Joe Date: Thu, 26 Mar 2026 18:15:01 +0100 Subject: [PATCH] Broke a few things, added a few more --- README.md | 4 +++ ansible/deploy-llama_cpp.yml | 6 ++++ ansible/deploy-vllm.yml | 6 ++++ .../inventory/example/host_vars/yourpc.yml | 7 ++-- ansible/roles/llama_cpp/defaults/main.yml | 27 ++++++++++++++ ansible/roles/llama_cpp/tasks/deploy/main.yml | 18 ++++++++++ ansible/roles/llama_cpp/tasks/main.yml | 5 +++ .../roles/llama_cpp/templates/composefile.yml | 35 +++++++++++++++++++ ansible/roles/ollama/defaults/main.yml | 2 ++ .../roles/ollama/templates/composefile.yml | 6 ++++ ansible/roles/vllm/defaults/main.yml | 19 ++++++++++ ansible/roles/vllm/tasks/deploy/main.yml | 18 ++++++++++ ansible/roles/vllm/tasks/main.yml | 5 +++ ansible/roles/vllm/templates/composefile.yml | 33 +++++++++++++++++ 14 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 ansible/deploy-llama_cpp.yml create mode 100644 ansible/deploy-vllm.yml create mode 100644 ansible/roles/llama_cpp/defaults/main.yml create mode 100644 ansible/roles/llama_cpp/tasks/deploy/main.yml create mode 100644 ansible/roles/llama_cpp/tasks/main.yml create mode 100644 ansible/roles/llama_cpp/templates/composefile.yml create mode 100644 ansible/roles/vllm/defaults/main.yml create mode 100644 ansible/roles/vllm/tasks/deploy/main.yml create mode 100644 ansible/roles/vllm/tasks/main.yml create mode 100644 ansible/roles/vllm/templates/composefile.yml diff --git a/README.md b/README.md index 8b40d3c..46df724 100644 --- a/README.md +++ b/README.md @@ -28,3 +28,7 @@ ansible-playbook -i inventory/yourinventory deploy-ai-at-home.yml This is currently a sloppy work in progress and might or not be developed in future. +Considerations: +vllm and llama can only provide one image per invocation, so the structure is different from ollama +Need multiple composefiles for different invocations and configurations instead of a single one +Requires changes in roles and so on \ No newline at end of file diff --git a/ansible/deploy-llama_cpp.yml b/ansible/deploy-llama_cpp.yml new file mode 100644 index 0000000..d8997ed --- /dev/null +++ b/ansible/deploy-llama_cpp.yml @@ -0,0 +1,6 @@ +--- +- name: Install llama.cpp + hosts: llama_cpp + + roles: + - llama_cpp diff --git a/ansible/deploy-vllm.yml b/ansible/deploy-vllm.yml new file mode 100644 index 0000000..7569591 --- /dev/null +++ b/ansible/deploy-vllm.yml @@ -0,0 +1,6 @@ +--- +- name: Install vLLM + hosts: vllm + + roles: + - vllm diff --git a/ansible/inventory/example/host_vars/yourpc.yml b/ansible/inventory/example/host_vars/yourpc.yml index 4afa0ef..c7cfc9b 100644 --- a/ansible/inventory/example/host_vars/yourpc.yml +++ b/ansible/inventory/example/host_vars/yourpc.yml @@ -4,5 +4,8 @@ openwebui_install_dir: "{{ ai_at_home_install_dir }}/openwebui" common_container_labels: - sablier.enable=true -ollama_container_labels: {{ common_container_labels + ["sablier.group=ai_backends"] }} -openwebui_container_labels: {{ common_container_labels + ["sablier.group=ai_frontends"] }} +ollama_container_env: + OLLAMA_KV_CACHE_TYPE: q8_0 + OLLAMA_FLASH_ATTENTION: 1 +ollama_container_labels: "{{ common_container_labels + ['sablier.group=ai_backends'] }}" +openwebui_container_labels: "{{ common_container_labels + ['sablier.group=ai_frontends'] }}" diff --git a/ansible/roles/llama_cpp/defaults/main.yml b/ansible/roles/llama_cpp/defaults/main.yml new file mode 100644 index 0000000..06263c0 --- /dev/null +++ b/ansible/roles/llama_cpp/defaults/main.yml @@ -0,0 +1,27 @@ +--- +llama_cpp_install_dir: /opt/llama_cpp +llama_cpp_data_dir: "{{ llama_cpp_install_dir }}/data" +llama_cpp_image: ghcr.io/ggml-org/llama.cpp +llama_cpp_image_tag: server-cuda +llama_cpp_container_labels: [] +llama_cpp_container_env: + LLAMA_CACHE: "/hf_cache" +# TODO: Check how llama persists data +llama_cpp_model: unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL +llama_cpp_args: +- -hf +- "{{ llama_cpp_model }}" +- --port +- "8090" +- --host +- "0.0.0.0" +- --ctx-size +- "16384" +- --temp +- "0.6" +- --top-p +- "0.95" +- --top-k +- "20" +- --min-p +- "0.00" diff --git a/ansible/roles/llama_cpp/tasks/deploy/main.yml b/ansible/roles/llama_cpp/tasks/deploy/main.yml new file mode 100644 index 0000000..4ec3c3e --- /dev/null +++ b/ansible/roles/llama_cpp/tasks/deploy/main.yml @@ -0,0 +1,18 @@ +--- +- name: Create llama.cpp Directories + ansible.builtin.file: + state: directory + path: "{{ llama_cpp_install_dir }}/{{ item }}" + loop: + - data + - compose + +- name: Pull llama.cpp Image + community.docker.docker_image: + source: pull + name: "{{ llama_cpp_image }}:{{ llama_cpp_image_tag }}" + +- name: Copy composefile + ansible.builtin.template: + src: templates/composefile.yml + dest: "{{ llama_cpp_install_dir }}/compose/composefile.yml" diff --git a/ansible/roles/llama_cpp/tasks/main.yml b/ansible/roles/llama_cpp/tasks/main.yml new file mode 100644 index 0000000..0f17a3f --- /dev/null +++ b/ansible/roles/llama_cpp/tasks/main.yml @@ -0,0 +1,5 @@ +--- +- name: Include selected operations for llama.cpp + tags: + - llama_cpp + ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml" \ No newline at end of file diff --git a/ansible/roles/llama_cpp/templates/composefile.yml b/ansible/roles/llama_cpp/templates/composefile.yml new file mode 100644 index 0000000..0f34339 --- /dev/null +++ b/ansible/roles/llama_cpp/templates/composefile.yml @@ -0,0 +1,35 @@ +services: + + llama_cpp: + image: {{ llama_cpp_image }}:{{ llama_cpp_image_tag }} + + {% if llama_cpp_args %} + command: + {{ llama_cpp_args|to_nice_yaml|indent(4) }} + {% endif %} + + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + network_mode: host + volumes: + - {{ llama_cpp_data_dir }}/models:/models + - {{ llama_cpp_data_dir }}/hf_cache:/root/.cache + tty: true + restart: unless-stopped + + {% if llama_cpp_container_env %} + environment: + {{ llama_cpp_container_env|to_nice_yaml|indent(6) }} + {% endif %} + + {% if llama_cpp_container_labels %} + labels: + {{ llama_cpp_container_labels|to_nice_yaml|indent(4) }} + {% endif %} + +#llama_cpp pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL \ No newline at end of file diff --git a/ansible/roles/ollama/defaults/main.yml b/ansible/roles/ollama/defaults/main.yml index d8f16c2..4a33e10 100644 --- a/ansible/roles/ollama/defaults/main.yml +++ b/ansible/roles/ollama/defaults/main.yml @@ -1,5 +1,7 @@ --- ollama_install_dir: /opt/ollama +ollama_model_cache: /opt/ollama/data #? ollama_image: ollama/ollama ollama_image_tag: latest ollama_container_labels: [] +ollama_container_env: {} \ No newline at end of file diff --git a/ansible/roles/ollama/templates/composefile.yml b/ansible/roles/ollama/templates/composefile.yml index 0cb63b2..8efe36a 100644 --- a/ansible/roles/ollama/templates/composefile.yml +++ b/ansible/roles/ollama/templates/composefile.yml @@ -14,7 +14,13 @@ services: - {{ ollama_install_dir }}/data:/root/.ollama tty: true restart: unless-stopped + {% if ollama_container_env %} + environment: + {{ ollama_container_env|to_nice_yaml|indent(6) }} + {% endif %} {% if ollama_container_labels %} labels: {{ ollama_container_labels|to_nice_yaml|indent(4) }} {% endif %} + +#ollama pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL \ No newline at end of file diff --git a/ansible/roles/vllm/defaults/main.yml b/ansible/roles/vllm/defaults/main.yml new file mode 100644 index 0000000..cbbf213 --- /dev/null +++ b/ansible/roles/vllm/defaults/main.yml @@ -0,0 +1,19 @@ +--- +vllm_install_dir: /opt/vllm +vllm_model_dir: "{{ vllm_install_dir }}/data" +vllm_image: vllm/vllm-openai +vllm_image_tag: latest +vllm_container_labels: [] +vllm_container_env: {} +#https://www.jan.ai/docs/desktop/jan-models/jan-code-4b +# vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF" +#DavidAU/Qwen3.5-40B-Claude-4.6-Opus-Deckard-Heretic-Uncensored-Thinking +#DavidAU/Qwen3.5-40B-RoughHouse-Claude-4.6-Opus-Polar-Deckard-Uncensored-Heretic-Thinking +vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF" +vllm_args: +- --enable-prefix-caching +- --enable-auto-tool-choice +- --reasoning-parser +- qwen3 +- --tool-call-parser +- qwen3_coder diff --git a/ansible/roles/vllm/tasks/deploy/main.yml b/ansible/roles/vllm/tasks/deploy/main.yml new file mode 100644 index 0000000..f3f64bf --- /dev/null +++ b/ansible/roles/vllm/tasks/deploy/main.yml @@ -0,0 +1,18 @@ +--- +- name: Create vllm Directories + ansible.builtin.file: + state: directory + path: "{{ vllm_install_dir }}/{{ item }}" + loop: + - data + - compose + +- name: Pull vllm Image + community.docker.docker_image: + source: pull + name: "{{ vllm_image }}:{{ vllm_image_tag }}" + +- name: Copy composefile + ansible.builtin.template: + src: templates/composefile.yml + dest: "{{ vllm_install_dir }}/compose/composefile.yml" diff --git a/ansible/roles/vllm/tasks/main.yml b/ansible/roles/vllm/tasks/main.yml new file mode 100644 index 0000000..868ae4a --- /dev/null +++ b/ansible/roles/vllm/tasks/main.yml @@ -0,0 +1,5 @@ +--- +- name: Include selected operations for vLLM + tags: + - vllm + ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml" \ No newline at end of file diff --git a/ansible/roles/vllm/templates/composefile.yml b/ansible/roles/vllm/templates/composefile.yml new file mode 100644 index 0000000..d54e700 --- /dev/null +++ b/ansible/roles/vllm/templates/composefile.yml @@ -0,0 +1,33 @@ +services: + + vllm: + image: {{ vllm_image }}:{{ vllm_image_tag }} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + network_mode: host + ipc: host + command: + # TODO: Fix identation... again + - {{ vllm_model }} + {% if vllm_args %} + {{ vllm_args|to_nice_yaml|indent(4) }} + {% endif %} + volumes: + - {{ vllm_model_dir }}:/root/.cache/huggingface + tty: true + restart: unless-stopped + {% if vllm_container_env %} + environment: + {{ vllm_container_env|to_nice_yaml|indent(6) }} + {% endif %} + {% if vllm_container_labels %} + labels: + {{ vllm_container_labels|to_nice_yaml|indent(4) }} + {% endif %} + +#vllm pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL \ No newline at end of file