Untitled

ubuntuai@ubuntuai ~/ik_llama.cpp (main)> ./build/bin/llama-server -m ~/models/DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf -c 2048 --host 0.0.0.0 --port 5000 -a DeepSeek-R1 -ngl 36 -mla

INFO [           print_timings] prompt eval time     =     842.49 ms /     8 tokens (  105.31 ms per token,     9.50 tokens per second) | tid="127215082766336" timestamp=1739349739 id_slot=0 id_task=0 t_prompt_processing=842.493 n_prompt_tokens_processed=8 t_token=105.311625 n_tokens_second=9.495627856848662
INFO [           print_timings] generation eval time =   43609.12 ms /   309 runs   (  141.13 ms per token,     7.09 tokens per second) | tid="127215082766336" timestamp=1739349739 id_slot=0 id_task=0 t_token_generation=43609.119 n_decoded=309 t_token=141.1298349514563 n_tokens_second=7.0856739848378965


ubuntuai@ubuntuai ~/ik_llama.cpp (main)> ./build/bin/llama-server -m ~/models/DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf -c 2048 --host 0.0.0.0 --port 5000 -a DeepSeek-R1 -ngl 36
INFO [           print_timings] prompt eval time     =     809.67 ms /     8 tokens (  101.21 ms per token,     9.88 tokens per second) | tid="137948583522304" timestamp=1739347778 id_slot=0 id_task=0 t_prompt_processing=809.667 n_prompt_tokens_processed=8 t_token=101.208375 n_tokens_second=9.88060523647376
INFO [           print_timings] generation eval time =   43648.61 ms /   309 runs   (  141.26 ms per token,     7.08 tokens per second) | tid="137948583522304" timestamp=1739347778 id_slot=0 id_task=0 t_token_generation=43648.614 n_decoded=309 t_token=141.2576504854369 n_tokens_second=7.079262585519897