Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- @echo off
- set CUDA_VISIBLE_DEVICES=0,1
- llama-server.exe ^
- -m "T:\models\DeepSeek-V3-0324-IQ1_S_R4-00001-of-00003.gguf" ^
- --n-gpu-layers 999 ^
- -ts 23,23 ^
- --threads 18 ^
- --threads-batch 18 ^
- --ctx-size 16384 ^
- --batch-size 4096 ^
- --ubatch-size 4096 ^
- --no-mmap ^
- -amb 512 ^
- -mla 3 ^
- -fa ^
- -fmoe ^
- -rtr ^
- -ot "blk\.(0|1|2|3|4|5|6|7|8)\..*exps=CUDA0" ^
- -ot "blk\.(9|10|11|12|13|14)\..*exps=CUDA1" ^
- -ot "exps=CPU"
Advertisement
Add Comment
Please, Sign In to add comment