Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- models:
- "qwen3-4b-thinking":
- name: "Qwen3 4B Thinking Q4.0"
- cmd: |
- /app/llama-server \
- --model /models/Qwen3-4B-Thinking-2507-Q4_0.gguf \
- --host 0.0.0.0 \
- --port ${PORT} \
- --no-mmap \
- --cache-type-k q4_0 \
- --cache-type-v q4_0 \
- --cache-reuse 512 \
- --no-webui
- ttl: 5
- "gpt-oss-20b-thinking":
- name: "GPT OSS 20B Thinking Q4.0 16K ctx"
- cmd: |
- /app/llama-server \
- --model /models/gpt-oss-20b-Q4_0.gguf \
- --host 0.0.0.0 \
- --port ${PORT} \
- --cache-type-k q4_0 \
- --cache-type-v q4_0 \
- --flash-attn on \
- --cache-reuse 512 \
- --ctx-size 16384
- --n-gpu-layers 10 \
- --n-cpu-moe 14
- --no-webui
- ttl: 5
- "ministral-14b-thinking":
- name: "Ministral 3 14B Thinking"
- cmd: |
- /app/llama-server \
- --model /models/Ministral-3-14B-Reasoning-2512-Q4_0.gguf \
- --host 0.0.0.0 \
- --port ${PORT} \
- --cache-type-k q4_0 \
- --cache-type-v q4_0 \
- --flash-attn on \
- --cache-reuse 512 \
- --ctx-size 8192 \
- --n-gpu-layers 35 \
- --no-mmap \
- --parallel 1 \
- --no-webui
- ttl: 120
Advertisement
Add Comment
Please, Sign In to add comment