Untitled

time=2025-10-22T19:31:51.503-07:00 level=INFO source=routes.go:1475 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/home/ldupin/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]"
time=2025-10-22T19:31:51.505-07:00 level=INFO source=images.go:518 msg="total blobs: 66"
time=2025-10-22T19:31:51.506-07:00 level=INFO source=images.go:525 msg="total unused blobs removed: 0"
time=2025-10-22T19:31:51.507-07:00 level=INFO source=routes.go:1528 msg="Listening on 127.0.0.1:11434 (version 0.12.3)"
time=2025-10-22T19:31:51.507-07:00 level=INFO source=gpu.go:217 msg="looking for compatible GPUs"
time=2025-10-22T19:31:51.541-07:00 level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory"
time=2025-10-22T19:31:51.543-07:00 level=INFO source=amd_linux.go:390 msg="amdgpu is supported" gpu=GPU-46860173bd5bfd44 gpu_type=gfx1100
time=2025-10-22T19:31:51.543-07:00 level=INFO source=types.go:131 msg="inference compute" id=GPU-46860173bd5bfd44 library=rocm variant="" compute=gfx1100 driver=0.0 name=1002:744c total="20.0 GiB" available="17.7 GiB"

time=2025-10-22T19:31:51.543-07:00 level=INFO source=routes.go:1569 msg="entering low vram mode" "total vram"="20.0 GiB" threshold="20.0 GiB"

time=2025-10-22T19:31:59.841-07:00 level=INFO source=server.go:200 msg="model wants flash attention"
time=2025-10-22T19:31:59.841-07:00 level=INFO source=server.go:217 msg="enabling flash attention"
time=2025-10-22T19:31:59.842-07:00 level=INFO source=server.go:399 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /home/ldupin/.ollama/models/blobs/sha256-b112e727c6f18875636c56a779790a590d705aec9e1c0eb5a97d51fc2a778583 --port 35771"
time=2025-10-22T19:31:59.842-07:00 level=INFO source=server.go:672 msg="loading model" "model layers"=25 requested=999
time=2025-10-22T19:31:59.842-07:00 level=INFO source=server.go:678 msg="system memory" total="62.7 GiB" free="57.5 GiB" free_swap="32.0 GiB"
time=2025-10-22T19:31:59.842-07:00 level=INFO source=server.go:686 msg="gpu memory" id=GPU-46860173bd5bfd44 available="17.3 GiB" free="17.8 GiB" minimum="457.0 MiB" overhead="0 B"
time=2025-10-22T19:31:59.852-07:00 level=INFO source=runner.go:1252 msg="starting ollama engine"
time=2025-10-22T19:31:59.852-07:00 level=INFO source=runner.go:1287 msg="Server listening on 127.0.0.1:35771"
time=2025-10-22T19:31:59.854-07:00 level=INFO source=runner.go:1171 msg=load request="{Operation:fit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:true KvSize:4096 KvCacheType: NumThreads:32 GPULayers:25[ID:GPU-46860173bd5bfd44 Layers:25(0..24)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}"
time=2025-10-22T19:31:59.896-07:00 level=INFO source=ggml.go:131 msg="" architecture=gptoss file_type=MXFP4 name="" description="" num_tensors=315 num_key_values=30
load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-haswell.so
time=2025-10-22T19:31:59.907-07:00 level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc)
time=2025-10-22T19:31:59.911-07:00 level=INFO source=runner.go:1171 msg=load request="{Operation:fit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:true KvSize:4096 KvCacheType: NumThreads:32 GPULayers:[] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}"
time=2025-10-22T19:31:59.958-07:00 level=INFO source=runner.go:1171 msg=load request="{Operation:alloc LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:true KvSize:4096 KvCacheType: NumThreads:32 GPULayers:[] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}"
time=2025-10-22T19:32:00.052-07:00 level=INFO source=runner.go:1171 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:true KvSize:4096 KvCacheType: NumThreads:32 GPULayers:[] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}"
time=2025-10-22T19:32:00.052-07:00 level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU"
time=2025-10-22T19:32:00.052-07:00 level=INFO source=ggml.go:491 msg="offloading output layer to CPU"
time=2025-10-22T19:32:00.052-07:00 level=INFO source=ggml.go:498 msg="offloaded 0/25 layers to GPU"
time=2025-10-22T19:32:00.052-07:00 level=INFO source=backend.go:315 msg="model weights" device=CPU size="12.8 GiB"
time=2025-10-22T19:32:00.052-07:00 level=INFO source=backend.go:326 msg="kv cache" device=CPU size="204.0 MiB"
time=2025-10-22T19:32:00.052-07:00 level=INFO source=backend.go:337 msg="compute graph" device=CPU size="109.2 MiB"
time=2025-10-22T19:32:00.052-07:00 level=INFO source=backend.go:342 msg="total memory" size="13.1 GiB"
time=2025-10-22T19:32:00.052-07:00 level=INFO source=sched.go:470 msg="loaded runners" count=1
time=2025-10-22T19:32:00.052-07:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding"
time=2025-10-22T19:32:00.054-07:00 level=INFO source=server.go:1285 msg="waiting for server to become available" status="llm server loading model"
time=2025-10-22T19:32:02.588-07:00 level=INFO source=server.go:1289 msg="llama runner started in 2.75 seconds"