Untitled

@echo off
set CUDA_VISIBLE_DEVICES=0,1
llama-server.exe ^
-m "T:\models\DeepSeek-V3-0324-IQ1_S_R4-00001-of-00003.gguf" ^
--n-gpu-layers 999 ^
-ts 23,23 ^
--threads 18 ^
--threads-batch 18 ^
--ctx-size 16384 ^
--batch-size 4096 ^
--ubatch-size 4096 ^
--no-mmap ^
-amb 512 ^
-mla 3 ^
-fa ^
-fmoe ^
-rtr ^
-ot "blk\.(0|1|2|3|4|5|6|7|8)\..*exps=CUDA0" ^
-ot "blk\.(9|10|11|12|13|14)\..*exps=CUDA1" ^
-ot "exps=CPU"