Untitled

kevin@kevin-lub-llm:~/ai$ cat run-qwen235
#!/bin/bash

pkill -f "llama-server"; sleep 10;

export CUDA_VISIBLE_DEVICES=2,0,1,3

# Generate tensor overrides
TENSOR_OVERRIDES=$(gguf-tensor-overrider -g https://huggingface.co/unsloth/Qwen3-235B-A22B-GGUF/resolve/main/IQ4_XS/Qwen3-235B-A22B-IQ4_XS-00001-of-00003.gguf -c 32000 --granular-gpu-percentage=0.85,0.99,0.95,0.95)

# Build command with tensor overrides
CMD="/home/kevin/llama.cpp/build/bin/llama-server -hf unsloth/Qwen3-235B-A22B-GGUF:IQ4_XS -c 16000 -fa -sm row --port 1337 --host 0.0.0.0 $TENSOR_OVERRIDES"

# Execute command directly
eval "$CMD"