Guest User

Untitled

a guest
Aug 4th, 2025
19
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.89 KB | None | 0 0
  1. Log start
  2. llama_model_loader: loaded meta data with 45 key-value pairs and 579 tensors from Qwen3-30B-A3B-Thinking-2507-Q4_K_S.gguf (version GGUF V3 (latest))
  3. llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
  4. llama_model_loader: - kv 0: general.architecture str = qwen3moe
  5. llama_model_loader: - kv 1: general.type str = model
  6. llama_model_loader: - kv 2: general.name str = Qwen3-30B-A3B-Thinking-2507
  7. llama_model_loader: - kv 3: general.version str = 2507
  8. llama_model_loader: - kv 4: general.finetune str = Thinking
  9. llama_model_loader: - kv 5: general.basename str = Qwen3-30B-A3B-Thinking-2507
  10. llama_model_loader: - kv 6: general.quantized_by str = Unsloth
  11. llama_model_loader: - kv 7: general.size_label str = 30B-A3B
  12. llama_model_loader: - kv 8: general.license str = apache-2.0
  13. llama_model_loader: - kv 9: general.license.link str = https://huggingface.co/Qwen/Qwen3-30B...
  14. llama_model_loader: - kv 10: general.repo_url str = https://huggingface.co/unsloth
  15. llama_model_loader: - kv 11: general.base_model.count u32 = 1
  16. llama_model_loader: - kv 12: general.base_model.0.name str = Qwen3 30B A3B Thinking 2507
  17. llama_model_loader: - kv 13: general.base_model.0.version str = 2507
  18. llama_model_loader: - kv 14: general.base_model.0.organization str = Qwen
  19. llama_model_loader: - kv 15: general.base_model.0.repo_url str = https://huggingface.co/Qwen/Qwen3-30B...
  20. llama_model_loader: - kv 16: general.tags arr[str,2] = ["unsloth", "text-generation"]
  21. llama_model_loader: - kv 17: qwen3moe.block_count u32 = 48
  22. llama_model_loader: - kv 18: qwen3moe.context_length u32 = 262144
  23. llama_model_loader: - kv 19: qwen3moe.embedding_length u32 = 2048
  24. llama_model_loader: - kv 20: qwen3moe.feed_forward_length u32 = 6144
  25. llama_model_loader: - kv 21: qwen3moe.attention.head_count u32 = 32
  26. llama_model_loader: - kv 22: qwen3moe.attention.head_count_kv u32 = 4
  27. llama_model_loader: - kv 23: qwen3moe.rope.freq_base f32 = 10000000.000000
  28. llama_model_loader: - kv 24: qwen3moe.attention.layer_norm_rms_epsilon f32 = 0.000001
  29. llama_model_loader: - kv 25: qwen3moe.expert_used_count u32 = 8
  30. llama_model_loader: - kv 26: qwen3moe.attention.key_length u32 = 128
  31. llama_model_loader: - kv 27: qwen3moe.attention.value_length u32 = 128
  32. llama_model_loader: - kv 28: qwen3moe.expert_count u32 = 128
  33. llama_model_loader: - kv 29: qwen3moe.expert_feed_forward_length u32 = 768
  34. llama_model_loader: - kv 30: tokenizer.ggml.model str = gpt2
  35. llama_model_loader: - kv 31: tokenizer.ggml.pre str = qwen2
  36. llama_model_loader: - kv 32: tokenizer.ggml.tokens arr[str,151936] = ["!", "\"", "#", "$", "%", "&", "'", ...
  37. llama_model_loader: - kv 33: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
  38. llama_model_loader: - kv 34: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
  39. llama_model_loader: - kv 35: tokenizer.ggml.eos_token_id u32 = 151645
  40. llama_model_loader: - kv 36: tokenizer.ggml.padding_token_id u32 = 151654
  41. llama_model_loader: - kv 37: tokenizer.ggml.add_bos_token bool = false
  42. llama_model_loader: - kv 38: tokenizer.chat_template str = {%- if tools %}\n {{- '<|im_start|>...
  43. llama_model_loader: - kv 39: general.quantization_version u32 = 2
  44. llama_model_loader: - kv 40: general.file_type u32 = 14
  45. llama_model_loader: - kv 41: quantize.imatrix.file str = Qwen3-30B-A3B-Thinking-2507-GGUF/imat...
  46. llama_model_loader: - kv 42: quantize.imatrix.dataset str = unsloth_calibration_Qwen3-30B-A3B-Thi...
  47. llama_model_loader: - kv 43: quantize.imatrix.entries_count u32 = 384
  48. llama_model_loader: - kv 44: quantize.imatrix.chunks_count u32 = 684
  49. llama_model_loader: - type f32: 241 tensors
  50. llama_model_loader: - type q4_K: 327 tensors
  51. llama_model_loader: - type q5_K: 10 tensors
  52. llama_model_loader: - type q6_K: 1 tensors
  53. llm_load_vocab: special tokens cache size = 26
  54. llm_load_vocab: token to piece cache size = 0.9311 MB
  55. llm_load_print_meta: format = GGUF V3 (latest)
  56. llm_load_print_meta: arch = qwen3moe
  57. llm_load_print_meta: vocab type = BPE
  58. llm_load_print_meta: n_vocab = 151936
  59. llm_load_print_meta: n_merges = 151387
  60. llm_load_print_meta: vocab_only = 0
  61. llm_load_print_meta: n_ctx_train = 262144
  62. llm_load_print_meta: n_embd = 2048
  63. llm_load_print_meta: n_layer = 48
  64. llm_load_print_meta: n_head = 32
  65. llm_load_print_meta: n_head_kv = 4
  66. llm_load_print_meta: n_rot = 128
  67. llm_load_print_meta: n_swa = 0
  68. llm_load_print_meta: n_swa_pattern = 1
  69. llm_load_print_meta: n_embd_head_k = 128
  70. llm_load_print_meta: n_embd_head_v = 128
  71. llm_load_print_meta: n_gqa = 8
  72. llm_load_print_meta: n_embd_k_gqa = 512
  73. llm_load_print_meta: n_embd_v_gqa = 512
  74. llm_load_print_meta: f_norm_eps = 0.0e+00
  75. llm_load_print_meta: f_norm_rms_eps = 1.0e-06
  76. llm_load_print_meta: f_clamp_kqv = 0.0e+00
  77. llm_load_print_meta: f_max_alibi_bias = 0.0e+00
  78. llm_load_print_meta: f_logit_scale = 0.0e+00
  79. llm_load_print_meta: n_ff = 6144
  80. llm_load_print_meta: n_expert = 128
  81. llm_load_print_meta: n_expert_used = 8
  82. llm_load_print_meta: causal attn = 1
  83. llm_load_print_meta: pooling type = 0
  84. llm_load_print_meta: rope type = 2
  85. llm_load_print_meta: rope scaling = linear
  86. llm_load_print_meta: freq_base_train = 10000000.0
  87. llm_load_print_meta: freq_scale_train = 1
  88. llm_load_print_meta: n_ctx_orig_yarn = 262144
  89. llm_load_print_meta: rope_finetuned = unknown
  90. llm_load_print_meta: ssm_d_conv = 0
  91. llm_load_print_meta: ssm_d_inner = 0
  92. llm_load_print_meta: ssm_d_state = 0
  93. llm_load_print_meta: ssm_dt_rank = 0
  94. llm_load_print_meta: model type = ?B
  95. llm_load_print_meta: model ftype = Q4_K - Small
  96. llm_load_print_meta: model params = 30.532 B
  97. llm_load_print_meta: model size = 16.252 GiB (4.572 BPW)
  98. llm_load_print_meta: repeating layers = 15.851 GiB (4.552 BPW, 29.910 B parameters)
  99. llm_load_print_meta: general.name = Qwen3-30B-A3B-Thinking-2507
  100. llm_load_print_meta: BOS token = 11 ','
  101. llm_load_print_meta: EOS token = 151645 '<|im_end|>'
  102. llm_load_print_meta: PAD token = 151654 '<|vision_pad|>'
  103. llm_load_print_meta: LF token = 148848 'ÄĬ'
  104. llm_load_print_meta: EOT token = 151645 '<|im_end|>'
  105. llm_load_print_meta: max token length = 256
  106. llm_load_print_meta: n_ff_exp = 768
  107. llm_load_tensors: ggml ctx size = 0.25 MiB
  108. llm_load_tensors: CPU buffer size = 16641.65 MiB
  109. ....................................................................................................
  110. llama_new_context_with_model: n_ctx = 32768
  111. llama_new_context_with_model: n_batch = 2048
  112. llama_new_context_with_model: n_ubatch = 512
  113. llama_new_context_with_model: flash_attn = 0
  114. llama_new_context_with_model: mla_attn = 0
  115. llama_new_context_with_model: attn_max_b = 0
  116. llama_new_context_with_model: fused_moe = 0
  117. llama_new_context_with_model: ser = -1, 0
  118. llama_new_context_with_model: freq_base = 10000000.0
  119. llama_new_context_with_model: freq_scale = 1
  120. llama_kv_cache_init: CPU KV buffer size = 3072.00 MiB
  121. llama_new_context_with_model: KV self size = 3072.00 MiB, K (f16): 1536.00 MiB, V (f16): 1536.00 MiB
  122. llama_new_context_with_model: CPU output buffer size = 0.58 MiB
  123. llama_new_context_with_model: CPU compute buffer size = 2136.01 MiB
  124. llama_new_context_with_model: graph nodes = 2165
  125. llama_new_context_with_model: graph splits = 578
  126. llama_model_loader: loaded meta data with 32 key-value pairs and 310 tensors from Qwen3-0.6B-UD-Q5_K_XL.gguf (version GGUF V3 (latest))
  127. llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
  128. llama_model_loader: - kv 0: general.architecture str = qwen3
  129. llama_model_loader: - kv 1: general.type str = model
  130. llama_model_loader: - kv 2: general.name str = Qwen3-0.6B
  131. llama_model_loader: - kv 3: general.basename str = Qwen3-0.6B
  132. llama_model_loader: - kv 4: general.quantized_by str = Unsloth
  133. llama_model_loader: - kv 5: general.size_label str = 0.6B
  134. llama_model_loader: - kv 6: general.repo_url str = https://huggingface.co/unsloth
  135. llama_model_loader: - kv 7: qwen3.block_count u32 = 28
  136. llama_model_loader: - kv 8: qwen3.context_length u32 = 40960
  137. llama_model_loader: - kv 9: qwen3.embedding_length u32 = 1024
  138. llama_model_loader: - kv 10: qwen3.feed_forward_length u32 = 3072
  139. llama_model_loader: - kv 11: qwen3.attention.head_count u32 = 16
  140. llama_model_loader: - kv 12: qwen3.attention.head_count_kv u32 = 8
  141. llama_model_loader: - kv 13: qwen3.rope.freq_base f32 = 1000000.000000
  142. llama_model_loader: - kv 14: qwen3.attention.layer_norm_rms_epsilon f32 = 0.000001
  143. llama_model_loader: - kv 15: qwen3.attention.key_length u32 = 128
  144. llama_model_loader: - kv 16: qwen3.attention.value_length u32 = 128
  145. llama_model_loader: - kv 17: tokenizer.ggml.model str = gpt2
  146. llama_model_loader: - kv 18: tokenizer.ggml.pre str = qwen2
  147. llama_model_loader: - kv 19: tokenizer.ggml.tokens arr[str,151936] = ["!", "\"", "#", "$", "%", "&", "'", ...
  148. llama_model_loader: - kv 20: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
  149. llama_model_loader: - kv 21: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
  150. llama_model_loader: - kv 22: tokenizer.ggml.eos_token_id u32 = 151645
  151. llama_model_loader: - kv 23: tokenizer.ggml.padding_token_id u32 = 151654
  152. llama_model_loader: - kv 24: tokenizer.ggml.add_bos_token bool = false
  153. llama_model_loader: - kv 25: tokenizer.chat_template str = {%- if tools %}\n {{- '<|im_start|>...
  154. llama_model_loader: - kv 26: general.quantization_version u32 = 2
  155. llama_model_loader: - kv 27: general.file_type u32 = 17
  156. llama_model_loader: - kv 28: quantize.imatrix.file str = Qwen3-0.6B-GGUF/imatrix_unsloth.dat
  157. llama_model_loader: - kv 29: quantize.imatrix.dataset str = unsloth_calibration_Qwen3-0.6B.txt
  158. llama_model_loader: - kv 30: quantize.imatrix.entries_count u32 = 196
  159. llama_model_loader: - kv 31: quantize.imatrix.chunks_count u32 = 688
  160. llama_model_loader: - type f32: 113 tensors
  161. llama_model_loader: - type q8_0: 1 tensors
  162. llama_model_loader: - type q4_K: 20 tensors
  163. llama_model_loader: - type q5_K: 120 tensors
  164. llama_model_loader: - type q6_K: 56 tensors
  165. llm_load_vocab: special tokens cache size = 26
  166. llm_load_vocab: token to piece cache size = 0.9311 MB
  167. llm_load_print_meta: format = GGUF V3 (latest)
  168. llm_load_print_meta: arch = qwen3
  169. llm_load_print_meta: vocab type = BPE
  170. llm_load_print_meta: n_vocab = 151936
  171. llm_load_print_meta: n_merges = 151387
  172. llm_load_print_meta: vocab_only = 0
  173. llm_load_print_meta: n_ctx_train = 40960
  174. llm_load_print_meta: n_embd = 1024
  175. llm_load_print_meta: n_layer = 28
  176. llm_load_print_meta: n_head = 16
  177. llm_load_print_meta: n_head_kv = 8
  178. llm_load_print_meta: n_rot = 128
  179. llm_load_print_meta: n_swa = 0
  180. llm_load_print_meta: n_swa_pattern = 1
  181. llm_load_print_meta: n_embd_head_k = 128
  182. llm_load_print_meta: n_embd_head_v = 128
  183. llm_load_print_meta: n_gqa = 2
  184. llm_load_print_meta: n_embd_k_gqa = 1024
  185. llm_load_print_meta: n_embd_v_gqa = 1024
  186. llm_load_print_meta: f_norm_eps = 0.0e+00
  187. llm_load_print_meta: f_norm_rms_eps = 1.0e-06
  188. llm_load_print_meta: f_clamp_kqv = 0.0e+00
  189. llm_load_print_meta: f_max_alibi_bias = 0.0e+00
  190. llm_load_print_meta: f_logit_scale = 0.0e+00
  191. llm_load_print_meta: n_ff = 3072
  192. llm_load_print_meta: n_expert = 0
  193. llm_load_print_meta: n_expert_used = 0
  194. llm_load_print_meta: causal attn = 1
  195. llm_load_print_meta: pooling type = 0
  196. llm_load_print_meta: rope type = 2
  197. llm_load_print_meta: rope scaling = linear
  198. llm_load_print_meta: freq_base_train = 1000000.0
  199. llm_load_print_meta: freq_scale_train = 1
  200. llm_load_print_meta: n_ctx_orig_yarn = 40960
  201. llm_load_print_meta: rope_finetuned = unknown
  202. llm_load_print_meta: ssm_d_conv = 0
  203. llm_load_print_meta: ssm_d_inner = 0
  204. llm_load_print_meta: ssm_d_state = 0
  205. llm_load_print_meta: ssm_dt_rank = 0
  206. llm_load_print_meta: model type = ?B
  207. llm_load_print_meta: model ftype = Q5_K - Medium
  208. llm_load_print_meta: model params = 596.050 M
  209. llm_load_print_meta: model size = 420.026 MiB (5.911 BPW)
  210. llm_load_print_meta: general.name = Qwen3-0.6B
  211. llm_load_print_meta: BOS token = 11 ','
  212. llm_load_print_meta: EOS token = 151645 '<|im_end|>'
  213. llm_load_print_meta: PAD token = 151654 '<|vision_pad|>'
  214. llm_load_print_meta: LF token = 148848 'ÄĬ'
  215. llm_load_print_meta: EOT token = 151645 '<|im_end|>'
  216. llm_load_print_meta: max token length = 256
  217. llm_load_tensors: ggml ctx size = 0.14 MiB
  218. llm_load_tensors: CPU buffer size = 420.03 MiB
  219. ..........................................................
  220. llama_new_context_with_model: n_ctx = 32768
  221. llama_new_context_with_model: n_batch = 2048
  222. llama_new_context_with_model: n_ubatch = 512
  223. llama_new_context_with_model: flash_attn = 0
  224. llama_new_context_with_model: mla_attn = 0
  225. llama_new_context_with_model: attn_max_b = 0
  226. llama_new_context_with_model: fused_moe = 0
  227. llama_new_context_with_model: ser = -1, 0
  228. llama_new_context_with_model: freq_base = 1000000.0
  229. llama_new_context_with_model: freq_scale = 1
  230. llama_kv_cache_init: CPU KV buffer size = 3584.00 MiB
  231. llama_new_context_with_model: KV self size = 3584.00 MiB, K (f16): 1792.00 MiB, V (f16): 1792.00 MiB
  232. llama_new_context_with_model: CPU output buffer size = 0.58 MiB
  233. llama_new_context_with_model: CPU compute buffer size = 1100.01 MiB
  234. llama_new_context_with_model: graph nodes = 873
  235. llama_new_context_with_model: graph splits = 394
  236.  
  237.  
  238. /build/source/src/llama.cpp:18273: GGML_ASSERT(n_tokens_all <= cparams.n_batch) failed
  239. /nix/store/ila9g3xmkicpfgpvyx9db2cwv23ng9ni-llama-cpp-blas-0.0.0/lib/libggml.so(+0x1e4eb)[0x7f58f561e4eb]
  240. /nix/store/ila9g3xmkicpfgpvyx9db2cwv23ng9ni-llama-cpp-blas-0.0.0/lib/libggml.so(ggml_abort+0x15f)[0x7f58f562010f]
  241. /nix/store/ila9g3xmkicpfgpvyx9db2cwv23ng9ni-llama-cpp-blas-0.0.0/lib/libllama.so(llama_decode+0x1976)[0x7f58f6185ab6]
  242. llama-speculative[0x41cc7c]
  243. /nix/store/0wydilnf1c9vznywsvxqnaing4wraaxp-glibc-2.39-52/lib/libc.so.6(+0x2a14e)[0x7f58f503314e]
  244. /nix/store/0wydilnf1c9vznywsvxqnaing4wraaxp-glibc-2.39-52/lib/libc.so.6(__libc_start_main+0x89)[0x7f58f5033209]
  245. llama-speculative[0x421515]
  246. Aborted (core dumped) llama-speculative -m Qwen3-30B-A3B-Thinking-2507-Q4_K_S.gguf -md Qwen3-0.6B-UD-Q5_K_XL.gguf -c 32768
Advertisement
Add Comment
Please, Sign In to add comment