Guest User

Untitled

a guest
Jul 20th, 2025
11
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.61 KB | None | 0 0
  1. kevin@kevin-lub-llm:~/ai$ cat run-qwen235
  2. #!/bin/bash
  3.  
  4. pkill -f "llama-server"; sleep 10;
  5.  
  6. export CUDA_VISIBLE_DEVICES=2,0,1,3
  7.  
  8. # Generate tensor overrides
  9. TENSOR_OVERRIDES=$(gguf-tensor-overrider -g https://huggingface.co/unsloth/Qwen3-235B-A22B-GGUF/resolve/main/IQ4_XS/Qwen3-235B-A22B-IQ4_XS-00001-of-00003.gguf -c 32000 --granular-gpu-percentage=0.85,0.99,0.95,0.95)
  10.  
  11. # Build command with tensor overrides
  12. CMD="/home/kevin/llama.cpp/build/bin/llama-server -hf unsloth/Qwen3-235B-A22B-GGUF:IQ4_XS -c 16000 -fa -sm row --port 1337 --host 0.0.0.0 $TENSOR_OVERRIDES"
  13.  
  14. # Execute command directly
  15. eval "$CMD"
  16.  
  17.  
  18.  
Advertisement
Add Comment
Please, Sign In to add comment