Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- kevin@kevin-lub-llm:~/ai$ cat run-qwen235
- #!/bin/bash
- pkill -f "llama-server"; sleep 10;
- export CUDA_VISIBLE_DEVICES=2,0,1,3
- # Generate tensor overrides
- TENSOR_OVERRIDES=$(gguf-tensor-overrider -g https://huggingface.co/unsloth/Qwen3-235B-A22B-GGUF/resolve/main/IQ4_XS/Qwen3-235B-A22B-IQ4_XS-00001-of-00003.gguf -c 32000 --granular-gpu-percentage=0.85,0.99,0.95,0.95)
- # Build command with tensor overrides
- CMD="/home/kevin/llama.cpp/build/bin/llama-server -hf unsloth/Qwen3-235B-A22B-GGUF:IQ4_XS -c 16000 -fa -sm row --port 1337 --host 0.0.0.0 $TENSOR_OVERRIDES"
- # Execute command directly
- eval "$CMD"
Advertisement
Add Comment
Please, Sign In to add comment