Broke a few things, added a few more

This commit is contained in:
2026-03-26 18:15:01 +01:00
parent f3ce5a8880
commit 9b11b5ece9
14 changed files with 189 additions and 2 deletions

View File

@@ -0,0 +1,27 @@
---
llama_cpp_install_dir: /opt/llama_cpp
llama_cpp_data_dir: "{{ llama_cpp_install_dir }}/data"
llama_cpp_image: ghcr.io/ggml-org/llama.cpp
llama_cpp_image_tag: server-cuda
llama_cpp_container_labels: []
llama_cpp_container_env:
LLAMA_CACHE: "/hf_cache"
# TODO: Check how llama persists data
llama_cpp_model: unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL
llama_cpp_args:
- -hf
- "{{ llama_cpp_model }}"
- --port
- "8090"
- --host
- "0.0.0.0"
- --ctx-size
- "16384"
- --temp
- "0.6"
- --top-p
- "0.95"
- --top-k
- "20"
- --min-p
- "0.00"

View File

@@ -0,0 +1,18 @@
---
- name: Create llama.cpp Directories
ansible.builtin.file:
state: directory
path: "{{ llama_cpp_install_dir }}/{{ item }}"
loop:
- data
- compose
- name: Pull llama.cpp Image
community.docker.docker_image:
source: pull
name: "{{ llama_cpp_image }}:{{ llama_cpp_image_tag }}"
- name: Copy composefile
ansible.builtin.template:
src: templates/composefile.yml
dest: "{{ llama_cpp_install_dir }}/compose/composefile.yml"

View File

@@ -0,0 +1,5 @@
---
- name: Include selected operations for llama.cpp
tags:
- llama_cpp
ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"

View File

@@ -0,0 +1,35 @@
services:
llama_cpp:
image: {{ llama_cpp_image }}:{{ llama_cpp_image_tag }}
{% if llama_cpp_args %}
command:
{{ llama_cpp_args|to_nice_yaml|indent(4) }}
{% endif %}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
network_mode: host
volumes:
- {{ llama_cpp_data_dir }}/models:/models
- {{ llama_cpp_data_dir }}/hf_cache:/root/.cache
tty: true
restart: unless-stopped
{% if llama_cpp_container_env %}
environment:
{{ llama_cpp_container_env|to_nice_yaml|indent(6) }}
{% endif %}
{% if llama_cpp_container_labels %}
labels:
{{ llama_cpp_container_labels|to_nice_yaml|indent(4) }}
{% endif %}
#llama_cpp pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL

View File

@@ -1,5 +1,7 @@
---
ollama_install_dir: /opt/ollama
ollama_model_cache: /opt/ollama/data #?
ollama_image: ollama/ollama
ollama_image_tag: latest
ollama_container_labels: []
ollama_container_env: {}

View File

@@ -14,7 +14,13 @@ services:
- {{ ollama_install_dir }}/data:/root/.ollama
tty: true
restart: unless-stopped
{% if ollama_container_env %}
environment:
{{ ollama_container_env|to_nice_yaml|indent(6) }}
{% endif %}
{% if ollama_container_labels %}
labels:
{{ ollama_container_labels|to_nice_yaml|indent(4) }}
{% endif %}
#ollama pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL

View File

@@ -0,0 +1,19 @@
---
vllm_install_dir: /opt/vllm
vllm_model_dir: "{{ vllm_install_dir }}/data"
vllm_image: vllm/vllm-openai
vllm_image_tag: latest
vllm_container_labels: []
vllm_container_env: {}
#https://www.jan.ai/docs/desktop/jan-models/jan-code-4b
# vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
#DavidAU/Qwen3.5-40B-Claude-4.6-Opus-Deckard-Heretic-Uncensored-Thinking
#DavidAU/Qwen3.5-40B-RoughHouse-Claude-4.6-Opus-Polar-Deckard-Uncensored-Heretic-Thinking
vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
vllm_args:
- --enable-prefix-caching
- --enable-auto-tool-choice
- --reasoning-parser
- qwen3
- --tool-call-parser
- qwen3_coder

View File

@@ -0,0 +1,18 @@
---
- name: Create vllm Directories
ansible.builtin.file:
state: directory
path: "{{ vllm_install_dir }}/{{ item }}"
loop:
- data
- compose
- name: Pull vllm Image
community.docker.docker_image:
source: pull
name: "{{ vllm_image }}:{{ vllm_image_tag }}"
- name: Copy composefile
ansible.builtin.template:
src: templates/composefile.yml
dest: "{{ vllm_install_dir }}/compose/composefile.yml"

View File

@@ -0,0 +1,5 @@
---
- name: Include selected operations for vLLM
tags:
- vllm
ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"

View File

@@ -0,0 +1,33 @@
services:
vllm:
image: {{ vllm_image }}:{{ vllm_image_tag }}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
network_mode: host
ipc: host
command:
# TODO: Fix identation... again
- {{ vllm_model }}
{% if vllm_args %}
{{ vllm_args|to_nice_yaml|indent(4) }}
{% endif %}
volumes:
- {{ vllm_model_dir }}:/root/.cache/huggingface
tty: true
restart: unless-stopped
{% if vllm_container_env %}
environment:
{{ vllm_container_env|to_nice_yaml|indent(6) }}
{% endif %}
{% if vllm_container_labels %}
labels:
{{ vllm_container_labels|to_nice_yaml|indent(4) }}
{% endif %}
#vllm pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL