Guest User

Untitled

a guest
Dec 16th, 2025
33
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.20 KB | None | 0 0
  1. models:
  2. "qwen3-4b-thinking":
  3. name: "Qwen3 4B Thinking Q4.0"
  4. cmd: |
  5. /app/llama-server \
  6. --model /models/Qwen3-4B-Thinking-2507-Q4_0.gguf \
  7. --host 0.0.0.0 \
  8. --port ${PORT} \
  9. --no-mmap \
  10. --cache-type-k q4_0 \
  11. --cache-type-v q4_0 \
  12. --cache-reuse 512 \
  13. --no-webui
  14. ttl: 5
  15. "gpt-oss-20b-thinking":
  16. name: "GPT OSS 20B Thinking Q4.0 16K ctx"
  17. cmd: |
  18. /app/llama-server \
  19. --model /models/gpt-oss-20b-Q4_0.gguf \
  20. --host 0.0.0.0 \
  21. --port ${PORT} \
  22. --cache-type-k q4_0 \
  23. --cache-type-v q4_0 \
  24. --flash-attn on \
  25. --cache-reuse 512 \
  26. --ctx-size 16384
  27. --n-gpu-layers 10 \
  28. --n-cpu-moe 14
  29. --no-webui
  30. ttl: 5
  31. "ministral-14b-thinking":
  32. name: "Ministral 3 14B Thinking"
  33. cmd: |
  34. /app/llama-server \
  35. --model /models/Ministral-3-14B-Reasoning-2512-Q4_0.gguf \
  36. --host 0.0.0.0 \
  37. --port ${PORT} \
  38. --cache-type-k q4_0 \
  39. --cache-type-v q4_0 \
  40. --flash-attn on \
  41. --cache-reuse 512 \
  42. --ctx-size 8192 \
  43. --n-gpu-layers 35 \
  44. --no-mmap \
  45. --parallel 1 \
  46. --no-webui
  47. ttl: 120
  48.  
Advertisement
Add Comment
Please, Sign In to add comment