Broke a few things, added a few more
This commit is contained in:
@@ -28,3 +28,7 @@ ansible-playbook -i inventory/yourinventory deploy-ai-at-home.yml
|
|||||||
|
|
||||||
This is currently a sloppy work in progress and might or not be developed in future.
|
This is currently a sloppy work in progress and might or not be developed in future.
|
||||||
|
|
||||||
|
Considerations:
|
||||||
|
vllm and llama can only provide one image per invocation, so the structure is different from ollama
|
||||||
|
Need multiple composefiles for different invocations and configurations instead of a single one
|
||||||
|
Requires changes in roles and so on
|
||||||
6
ansible/deploy-llama_cpp.yml
Normal file
6
ansible/deploy-llama_cpp.yml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
- name: Install llama.cpp
|
||||||
|
hosts: llama_cpp
|
||||||
|
|
||||||
|
roles:
|
||||||
|
- llama_cpp
|
||||||
6
ansible/deploy-vllm.yml
Normal file
6
ansible/deploy-vllm.yml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
- name: Install vLLM
|
||||||
|
hosts: vllm
|
||||||
|
|
||||||
|
roles:
|
||||||
|
- vllm
|
||||||
@@ -4,5 +4,8 @@ openwebui_install_dir: "{{ ai_at_home_install_dir }}/openwebui"
|
|||||||
|
|
||||||
common_container_labels:
|
common_container_labels:
|
||||||
- sablier.enable=true
|
- sablier.enable=true
|
||||||
ollama_container_labels: {{ common_container_labels + ["sablier.group=ai_backends"] }}
|
ollama_container_env:
|
||||||
openwebui_container_labels: {{ common_container_labels + ["sablier.group=ai_frontends"] }}
|
OLLAMA_KV_CACHE_TYPE: q8_0
|
||||||
|
OLLAMA_FLASH_ATTENTION: 1
|
||||||
|
ollama_container_labels: "{{ common_container_labels + ['sablier.group=ai_backends'] }}"
|
||||||
|
openwebui_container_labels: "{{ common_container_labels + ['sablier.group=ai_frontends'] }}"
|
||||||
|
|||||||
27
ansible/roles/llama_cpp/defaults/main.yml
Normal file
27
ansible/roles/llama_cpp/defaults/main.yml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
---
|
||||||
|
llama_cpp_install_dir: /opt/llama_cpp
|
||||||
|
llama_cpp_data_dir: "{{ llama_cpp_install_dir }}/data"
|
||||||
|
llama_cpp_image: ghcr.io/ggml-org/llama.cpp
|
||||||
|
llama_cpp_image_tag: server-cuda
|
||||||
|
llama_cpp_container_labels: []
|
||||||
|
llama_cpp_container_env:
|
||||||
|
LLAMA_CACHE: "/hf_cache"
|
||||||
|
# TODO: Check how llama persists data
|
||||||
|
llama_cpp_model: unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL
|
||||||
|
llama_cpp_args:
|
||||||
|
- -hf
|
||||||
|
- "{{ llama_cpp_model }}"
|
||||||
|
- --port
|
||||||
|
- "8090"
|
||||||
|
- --host
|
||||||
|
- "0.0.0.0"
|
||||||
|
- --ctx-size
|
||||||
|
- "16384"
|
||||||
|
- --temp
|
||||||
|
- "0.6"
|
||||||
|
- --top-p
|
||||||
|
- "0.95"
|
||||||
|
- --top-k
|
||||||
|
- "20"
|
||||||
|
- --min-p
|
||||||
|
- "0.00"
|
||||||
18
ansible/roles/llama_cpp/tasks/deploy/main.yml
Normal file
18
ansible/roles/llama_cpp/tasks/deploy/main.yml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
---
|
||||||
|
- name: Create llama.cpp Directories
|
||||||
|
ansible.builtin.file:
|
||||||
|
state: directory
|
||||||
|
path: "{{ llama_cpp_install_dir }}/{{ item }}"
|
||||||
|
loop:
|
||||||
|
- data
|
||||||
|
- compose
|
||||||
|
|
||||||
|
- name: Pull llama.cpp Image
|
||||||
|
community.docker.docker_image:
|
||||||
|
source: pull
|
||||||
|
name: "{{ llama_cpp_image }}:{{ llama_cpp_image_tag }}"
|
||||||
|
|
||||||
|
- name: Copy composefile
|
||||||
|
ansible.builtin.template:
|
||||||
|
src: templates/composefile.yml
|
||||||
|
dest: "{{ llama_cpp_install_dir }}/compose/composefile.yml"
|
||||||
5
ansible/roles/llama_cpp/tasks/main.yml
Normal file
5
ansible/roles/llama_cpp/tasks/main.yml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
- name: Include selected operations for llama.cpp
|
||||||
|
tags:
|
||||||
|
- llama_cpp
|
||||||
|
ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"
|
||||||
35
ansible/roles/llama_cpp/templates/composefile.yml
Normal file
35
ansible/roles/llama_cpp/templates/composefile.yml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
services:
|
||||||
|
|
||||||
|
llama_cpp:
|
||||||
|
image: {{ llama_cpp_image }}:{{ llama_cpp_image_tag }}
|
||||||
|
|
||||||
|
{% if llama_cpp_args %}
|
||||||
|
command:
|
||||||
|
{{ llama_cpp_args|to_nice_yaml|indent(4) }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities: [gpu]
|
||||||
|
network_mode: host
|
||||||
|
volumes:
|
||||||
|
- {{ llama_cpp_data_dir }}/models:/models
|
||||||
|
- {{ llama_cpp_data_dir }}/hf_cache:/root/.cache
|
||||||
|
tty: true
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
{% if llama_cpp_container_env %}
|
||||||
|
environment:
|
||||||
|
{{ llama_cpp_container_env|to_nice_yaml|indent(6) }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if llama_cpp_container_labels %}
|
||||||
|
labels:
|
||||||
|
{{ llama_cpp_container_labels|to_nice_yaml|indent(4) }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
#llama_cpp pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
---
|
---
|
||||||
ollama_install_dir: /opt/ollama
|
ollama_install_dir: /opt/ollama
|
||||||
|
ollama_model_cache: /opt/ollama/data #?
|
||||||
ollama_image: ollama/ollama
|
ollama_image: ollama/ollama
|
||||||
ollama_image_tag: latest
|
ollama_image_tag: latest
|
||||||
ollama_container_labels: []
|
ollama_container_labels: []
|
||||||
|
ollama_container_env: {}
|
||||||
@@ -14,7 +14,13 @@ services:
|
|||||||
- {{ ollama_install_dir }}/data:/root/.ollama
|
- {{ ollama_install_dir }}/data:/root/.ollama
|
||||||
tty: true
|
tty: true
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
{% if ollama_container_env %}
|
||||||
|
environment:
|
||||||
|
{{ ollama_container_env|to_nice_yaml|indent(6) }}
|
||||||
|
{% endif %}
|
||||||
{% if ollama_container_labels %}
|
{% if ollama_container_labels %}
|
||||||
labels:
|
labels:
|
||||||
{{ ollama_container_labels|to_nice_yaml|indent(4) }}
|
{{ ollama_container_labels|to_nice_yaml|indent(4) }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
#ollama pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
|
||||||
19
ansible/roles/vllm/defaults/main.yml
Normal file
19
ansible/roles/vllm/defaults/main.yml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
vllm_install_dir: /opt/vllm
|
||||||
|
vllm_model_dir: "{{ vllm_install_dir }}/data"
|
||||||
|
vllm_image: vllm/vllm-openai
|
||||||
|
vllm_image_tag: latest
|
||||||
|
vllm_container_labels: []
|
||||||
|
vllm_container_env: {}
|
||||||
|
#https://www.jan.ai/docs/desktop/jan-models/jan-code-4b
|
||||||
|
# vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
|
||||||
|
#DavidAU/Qwen3.5-40B-Claude-4.6-Opus-Deckard-Heretic-Uncensored-Thinking
|
||||||
|
#DavidAU/Qwen3.5-40B-RoughHouse-Claude-4.6-Opus-Polar-Deckard-Uncensored-Heretic-Thinking
|
||||||
|
vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
|
||||||
|
vllm_args:
|
||||||
|
- --enable-prefix-caching
|
||||||
|
- --enable-auto-tool-choice
|
||||||
|
- --reasoning-parser
|
||||||
|
- qwen3
|
||||||
|
- --tool-call-parser
|
||||||
|
- qwen3_coder
|
||||||
18
ansible/roles/vllm/tasks/deploy/main.yml
Normal file
18
ansible/roles/vllm/tasks/deploy/main.yml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
---
|
||||||
|
- name: Create vllm Directories
|
||||||
|
ansible.builtin.file:
|
||||||
|
state: directory
|
||||||
|
path: "{{ vllm_install_dir }}/{{ item }}"
|
||||||
|
loop:
|
||||||
|
- data
|
||||||
|
- compose
|
||||||
|
|
||||||
|
- name: Pull vllm Image
|
||||||
|
community.docker.docker_image:
|
||||||
|
source: pull
|
||||||
|
name: "{{ vllm_image }}:{{ vllm_image_tag }}"
|
||||||
|
|
||||||
|
- name: Copy composefile
|
||||||
|
ansible.builtin.template:
|
||||||
|
src: templates/composefile.yml
|
||||||
|
dest: "{{ vllm_install_dir }}/compose/composefile.yml"
|
||||||
5
ansible/roles/vllm/tasks/main.yml
Normal file
5
ansible/roles/vllm/tasks/main.yml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
- name: Include selected operations for vLLM
|
||||||
|
tags:
|
||||||
|
- vllm
|
||||||
|
ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"
|
||||||
33
ansible/roles/vllm/templates/composefile.yml
Normal file
33
ansible/roles/vllm/templates/composefile.yml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
services:
|
||||||
|
|
||||||
|
vllm:
|
||||||
|
image: {{ vllm_image }}:{{ vllm_image_tag }}
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities: [gpu]
|
||||||
|
network_mode: host
|
||||||
|
ipc: host
|
||||||
|
command:
|
||||||
|
# TODO: Fix identation... again
|
||||||
|
- {{ vllm_model }}
|
||||||
|
{% if vllm_args %}
|
||||||
|
{{ vllm_args|to_nice_yaml|indent(4) }}
|
||||||
|
{% endif %}
|
||||||
|
volumes:
|
||||||
|
- {{ vllm_model_dir }}:/root/.cache/huggingface
|
||||||
|
tty: true
|
||||||
|
restart: unless-stopped
|
||||||
|
{% if vllm_container_env %}
|
||||||
|
environment:
|
||||||
|
{{ vllm_container_env|to_nice_yaml|indent(6) }}
|
||||||
|
{% endif %}
|
||||||
|
{% if vllm_container_labels %}
|
||||||
|
labels:
|
||||||
|
{{ vllm_container_labels|to_nice_yaml|indent(4) }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
#vllm pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
|
||||||
Reference in New Issue
Block a user