Broke a few things, added a few more

2026-03-26 18:15:01 +01:00
parent f3ce5a8880
commit 9b11b5ece9
14 changed files with 189 additions and 2 deletions
@@ -28,3 +28,7 @@ ansible-playbook -i inventory/yourinventory deploy-ai-at-home.yml
 This is currently a sloppy work in progress and might or not be developed in future.
 Considerations:
 vllm and llama can only provide one image per invocation, so the structure is different from ollama
 Need multiple composefiles for different invocations and configurations instead of a single one
 Requires changes in roles and so on
@@ -0,0 +1,6 @@
 ---
 - name: Install llama.cpp
  hosts: llama_cpp
  roles:
  - llama_cpp
@@ -0,0 +1,6 @@
 ---
 - name: Install vLLM
  hosts: vllm
  roles:
  - vllm
@@ -4,5 +4,8 @@ openwebui_install_dir: "{{ ai_at_home_install_dir }}/openwebui"
 common_container_labels:
 - sablier.enable=true
-ollama_container_labels: {{ common_container_labels + ["sablier.group=ai_backends"] }}
+ollama_container_env:
-openwebui_container_labels: {{ common_container_labels + ["sablier.group=ai_frontends"] }}
+  OLLAMA_KV_CACHE_TYPE: q8_0
  OLLAMA_FLASH_ATTENTION: 1
 ollama_container_labels: "{{ common_container_labels + ['sablier.group=ai_backends'] }}"
 openwebui_container_labels: "{{ common_container_labels + ['sablier.group=ai_frontends'] }}"
@@ -0,0 +1,27 @@
 ---
 llama_cpp_install_dir: /opt/llama_cpp
 llama_cpp_data_dir: "{{ llama_cpp_install_dir }}/data"
 llama_cpp_image: ghcr.io/ggml-org/llama.cpp
 llama_cpp_image_tag: server-cuda
 llama_cpp_container_labels: []
 llama_cpp_container_env:
  LLAMA_CACHE: "/hf_cache"
 # TODO: Check how llama persists data
 llama_cpp_model: unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL
 llama_cpp_args:
 - -hf 
 - "{{ llama_cpp_model }}"
 - --port
 - "8090"
 - --host
 - "0.0.0.0"
 - --ctx-size
 - "16384"
 - --temp
 - "0.6"
 - --top-p
 - "0.95"
 - --top-k
 - "20"
 - --min-p
 - "0.00"
@@ -0,0 +1,18 @@
 ---
 - name: Create llama.cpp Directories
  ansible.builtin.file:
    state: directory
    path: "{{ llama_cpp_install_dir }}/{{ item }}"
  loop:
  - data
  - compose
 - name: Pull llama.cpp Image
  community.docker.docker_image:
    source: pull
    name: "{{ llama_cpp_image }}:{{ llama_cpp_image_tag }}"
 - name: Copy composefile
  ansible.builtin.template:
    src: templates/composefile.yml
    dest: "{{ llama_cpp_install_dir }}/compose/composefile.yml"
@@ -0,0 +1,5 @@
 ---
 - name: Include selected operations for llama.cpp
  tags:
  - llama_cpp
  ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"
@@ -0,0 +1,35 @@
 services:
  llama_cpp:
    image: {{ llama_cpp_image }}:{{ llama_cpp_image_tag }}
    {% if llama_cpp_args %}
    command:
    {{ llama_cpp_args|to_nice_yaml|indent(4) }}
    {% endif %}
    deploy:
      resources:
        reservations:
          devices:
          - driver: nvidia
            count: all
            capabilities: [gpu]
    network_mode: host
    volumes:
    - {{ llama_cpp_data_dir }}/models:/models
    - {{ llama_cpp_data_dir }}/hf_cache:/root/.cache
    tty: true
    restart: unless-stopped
    {% if llama_cpp_container_env %}
    environment:
    {{ llama_cpp_container_env|to_nice_yaml|indent(6) }}
    {% endif %}
    {% if llama_cpp_container_labels %}
    labels:
    {{ llama_cpp_container_labels|to_nice_yaml|indent(4) }}
    {% endif %}
 #llama_cpp pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
@@ -1,5 +1,7 @@
 ---
 ollama_install_dir: /opt/ollama
 ollama_model_cache: /opt/ollama/data #?
 ollama_image: ollama/ollama
 ollama_image_tag: latest
 ollama_container_labels: []
 ollama_container_env: {}
@@ -14,7 +14,13 @@ services:
    - {{ ollama_install_dir }}/data:/root/.ollama
    tty: true
    restart: unless-stopped
    {% if ollama_container_env %}
    environment:
    {{ ollama_container_env|to_nice_yaml|indent(6) }}
    {% endif %}
    {% if ollama_container_labels %}
    labels:
    {{ ollama_container_labels|to_nice_yaml|indent(4) }}
    {% endif %}
 #ollama pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL
@@ -0,0 +1,19 @@
 ---
 vllm_install_dir: /opt/vllm
 vllm_model_dir: "{{ vllm_install_dir }}/data"
 vllm_image: vllm/vllm-openai
 vllm_image_tag: latest
 vllm_container_labels: []
 vllm_container_env: {}
 #https://www.jan.ai/docs/desktop/jan-models/jan-code-4b
 # vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
 #DavidAU/Qwen3.5-40B-Claude-4.6-Opus-Deckard-Heretic-Uncensored-Thinking
 #DavidAU/Qwen3.5-40B-RoughHouse-Claude-4.6-Opus-Polar-Deckard-Uncensored-Heretic-Thinking
 vllm_model: "unsloth/Qwen3.5-35B-A3B-GGUF"
 vllm_args:
 - --enable-prefix-caching
 - --enable-auto-tool-choice 
 - --reasoning-parser 
 - qwen3
 - --tool-call-parser
 - qwen3_coder 
@@ -0,0 +1,18 @@
 ---
 - name: Create vllm Directories
  ansible.builtin.file:
    state: directory
    path: "{{ vllm_install_dir }}/{{ item }}"
  loop:
  - data
  - compose
 - name: Pull vllm Image
  community.docker.docker_image:
    source: pull
    name: "{{ vllm_image }}:{{ vllm_image_tag }}"
 - name: Copy composefile
  ansible.builtin.template:
    src: templates/composefile.yml
    dest: "{{ vllm_install_dir }}/compose/composefile.yml"
@@ -0,0 +1,5 @@
 ---
 - name: Include selected operations for vLLM
  tags:
  - vllm
  ansible.builtin.include_tasks: "{{ deployment_action|default('deploy') }}/main.yml"
@@ -0,0 +1,33 @@
 services:
  vllm:
    image: {{ vllm_image }}:{{ vllm_image_tag }}
    deploy:
      resources:
        reservations:
          devices:
          - driver: nvidia
            count: all
            capabilities: [gpu]
    network_mode: host
    ipc: host
    command:
    # TODO: Fix identation... again
    - {{ vllm_model }}
    {% if vllm_args %}
    {{ vllm_args|to_nice_yaml|indent(4) }}
    {% endif %}
    volumes:
    - {{ vllm_model_dir }}:/root/.cache/huggingface
    tty: true
    restart: unless-stopped
    {% if vllm_container_env %}
    environment:
    {{ vllm_container_env|to_nice_yaml|indent(6) }}
    {% endif %}
    {% if vllm_container_labels %}
    labels:
    {{ vllm_container_labels|to_nice_yaml|indent(4) }}
    {% endif %}
 #vllm pull hf.co/unsloth/Apriel-1.5-15b-Thinker-GGUF:UD-Q4_K_XL