Lissanro

DeepSeek-V3-0324-GGUF-UD-Q4_K_XL to q4_k_r4 conversion log

Apr 11th, 2025
37
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 135.55 KB | None | 0 0
  1. > ~/pkgs/ik_llama.cpp/build/bin/llama-quantize --repack --repack-pattern exps /home/lissanro/pkgs/text-generation-webui/models/DeepSeek-V3-0324-GGUF-UD-Q4_K_XL/DeepSeek-V3-0324-UD-Q4_K_XL-00001-of-00009.gguf /mnt/secondary/tmp/DeepSeek-V3-0324-GGUF-UD-Q4_K_R4.gguf q4_k_r4
  2. main: build = 3630 (5f44f4b3)
  3. main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 for x86_64-linux-gnu
  4. main: quantizing '/home/lissanro/pkgs/text-generation-webui/models/DeepSeek-V3-0324-GGUF-UD-Q4_K_XL/DeepSeek-V3-0324-UD-Q4_K_XL-00001-of-00009.gguf' to '/mnt/secondary/tmp/DeepSeek-V3-0324-GGUF-UD-Q4_K_R4.gguf' as Q4_K_R4
  5. llama_model_loader: additional 8 GGUFs metadata loaded.
  6. llama_model_loader: loaded meta data with 49 key-value pairs and 1025 tensors from /home/lissanro/pkgs/text-generation-webui/models/DeepSeek-V3-0324-GGUF-UD-Q4_K_XL/DeepSeek-V3-0324-UD-Q4_K_XL-00001-of-00009.gguf (version GGUF V3 (latest))
  7. llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
  8. llama_model_loader: - kv 0: general.architecture str = deepseek2
  9. llama_model_loader: - kv 1: general.type str = model
  10. llama_model_loader: - kv 2: general.name str = DeepSeek V3 0324 BF16
  11. llama_model_loader: - kv 3: general.quantized_by str = Unsloth
  12. llama_model_loader: - kv 4: general.size_label str = 256x20B
  13. llama_model_loader: - kv 5: general.license str = mit
  14. llama_model_loader: - kv 6: general.repo_url str = https://huggingface.co/unsloth
  15. llama_model_loader: - kv 7: deepseek2.block_count u32 = 61
  16. llama_model_loader: - kv 8: deepseek2.context_length u32 = 163840
  17. llama_model_loader: - kv 9: deepseek2.embedding_length u32 = 7168
  18. llama_model_loader: - kv 10: deepseek2.feed_forward_length u32 = 18432
  19. llama_model_loader: - kv 11: deepseek2.attention.head_count u32 = 128
  20. llama_model_loader: - kv 12: deepseek2.attention.head_count_kv u32 = 128
  21. llama_model_loader: - kv 13: deepseek2.rope.freq_base f32 = 10000.000000
  22. llama_model_loader: - kv 14: deepseek2.attention.layer_norm_rms_epsilon f32 = 0.000001
  23. llama_model_loader: - kv 15: deepseek2.expert_used_count u32 = 8
  24. llama_model_loader: - kv 16: deepseek2.leading_dense_block_count u32 = 3
  25. llama_model_loader: - kv 17: deepseek2.vocab_size u32 = 129280
  26. llama_model_loader: - kv 18: deepseek2.attention.q_lora_rank u32 = 1536
  27. llama_model_loader: - kv 19: deepseek2.attention.kv_lora_rank u32 = 512
  28. llama_model_loader: - kv 20: deepseek2.attention.key_length u32 = 192
  29. llama_model_loader: - kv 21: deepseek2.attention.value_length u32 = 128
  30. llama_model_loader: - kv 22: deepseek2.expert_feed_forward_length u32 = 2048
  31. llama_model_loader: - kv 23: deepseek2.expert_count u32 = 256
  32. llama_model_loader: - kv 24: deepseek2.expert_shared_count u32 = 1
  33. llama_model_loader: - kv 25: deepseek2.expert_weights_scale f32 = 2.500000
  34. llama_model_loader: - kv 26: deepseek2.expert_weights_norm bool = true
  35. llama_model_loader: - kv 27: deepseek2.expert_gating_func u32 = 2
  36. llama_model_loader: - kv 28: deepseek2.rope.dimension_count u32 = 64
  37. llama_model_loader: - kv 29: deepseek2.rope.scaling.type str = yarn
  38. llama_model_loader: - kv 30: deepseek2.rope.scaling.factor f32 = 40.000000
  39. llama_model_loader: - kv 31: deepseek2.rope.scaling.original_context_length u32 = 4096
  40. llama_model_loader: - kv 32: deepseek2.rope.scaling.yarn_log_multiplier f32 = 0.100000
  41. llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
  42. llama_model_loader: - kv 34: tokenizer.ggml.pre str = deepseek-v3
  43. llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,129280] = ["<|begin▁of▁sentence|>", "<�...
  44. llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,129280] = [3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
  45. llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,127741] = ["Ġ t", "Ġ a", "i n", "Ġ Ġ", "h e...
  46. llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 0
  47. llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 1
  48. llama_model_loader: - kv 40: tokenizer.ggml.padding_token_id u32 = 1
  49. llama_model_loader: - kv 41: tokenizer.ggml.add_bos_token bool = true
  50. llama_model_loader: - kv 42: tokenizer.ggml.add_eos_token bool = false
  51. llama_model_loader: - kv 43: tokenizer.chat_template str = {% if not add_generation_prompt is de...
  52. llama_model_loader: - kv 44: general.quantization_version u32 = 2
  53. llama_model_loader: - kv 45: general.file_type u32 = 15
  54. llama_model_loader: - kv 46: split.no u16 = 0
  55. llama_model_loader: - kv 47: split.tensors.count i32 = 1025
  56. llama_model_loader: - kv 48: split.count u16 = 9
  57. llama_model_loader: - type f32: 361 tensors
  58. llama_model_loader: - type q4_K: 453 tensors
  59. llama_model_loader: - type q6_K: 211 tensors
  60. ===================== Model ftype: Q4_K - Medium: Repacked ftype: Q4_K_R4
  61. [ 1/1025] output.weight - [ 7168, 129280, 1, 1], type = q6_K, size = 724.951 MB, type = q6_K
  62. [ 2/1025] output_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  63. [ 3/1025] token_embd.weight - [ 7168, 129280, 1, 1], type = q4_K, size = 497.109 MB, type = q4_K
  64. [ 4/1025] blk.0.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  65. [ 5/1025] blk.0.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  66. [ 6/1025] blk.0.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  67. [ 7/1025] blk.0.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  68. [ 8/1025] blk.0.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  69. [ 9/1025] blk.0.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  70. [ 10/1025] blk.0.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  71. [ 11/1025] blk.0.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  72. [ 12/1025] blk.0.ffn_down.weight - [18432, 7168, 1, 1], type = q6_K, size = 103.359 MB, type = q6_K
  73. [ 13/1025] blk.0.ffn_gate.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
  74. [ 14/1025] blk.0.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  75. [ 15/1025] blk.0.ffn_up.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
  76. [ 16/1025] blk.1.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  77. [ 17/1025] blk.1.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  78. [ 18/1025] blk.1.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  79. [ 19/1025] blk.1.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  80. [ 20/1025] blk.1.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  81. [ 21/1025] blk.1.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  82. [ 22/1025] blk.1.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  83. [ 23/1025] blk.1.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  84. [ 24/1025] blk.1.ffn_down.weight - [18432, 7168, 1, 1], type = q6_K, size = 103.359 MB, type = q6_K
  85. [ 25/1025] blk.1.ffn_gate.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
  86. [ 26/1025] blk.1.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  87. [ 27/1025] blk.1.ffn_up.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
  88. [ 28/1025] blk.2.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  89. [ 29/1025] blk.2.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  90. [ 30/1025] blk.2.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  91. [ 31/1025] blk.2.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  92. [ 32/1025] blk.2.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  93. [ 33/1025] blk.2.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  94. [ 34/1025] blk.2.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  95. [ 35/1025] blk.2.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  96. [ 36/1025] blk.2.ffn_down.weight - [18432, 7168, 1, 1], type = q6_K, size = 103.359 MB, type = q6_K
  97. [ 37/1025] blk.2.ffn_gate.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
  98. [ 38/1025] blk.2.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  99. [ 39/1025] blk.2.ffn_up.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
  100. [ 40/1025] blk.3.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  101. [ 41/1025] blk.3.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  102. [ 42/1025] blk.3.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  103. [ 43/1025] blk.3.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  104. [ 44/1025] blk.3.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  105. [ 45/1025] blk.3.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  106. [ 46/1025] blk.3.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  107. [ 47/1025] blk.3.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  108. [ 48/1025] blk.3.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  109. [ 49/1025] blk.3.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  110. [ 50/1025] blk.3.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  111. [ 51/1025] blk.3.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  112. [ 52/1025] blk.3.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  113. [ 53/1025] blk.3.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  114. [ 54/1025] blk.3.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  115. [ 55/1025] blk.3.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  116. [ 56/1025] blk.3.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  117. [ 57/1025] blk.4.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  118. [ 58/1025] blk.4.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  119. [ 59/1025] blk.4.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  120. [ 60/1025] blk.4.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  121. [ 61/1025] blk.4.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  122. [ 62/1025] blk.4.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  123. [ 63/1025] blk.4.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  124. [ 64/1025] blk.4.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  125. [ 65/1025] blk.4.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  126. [ 66/1025] blk.4.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  127. [ 67/1025] blk.4.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  128. [ 68/1025] blk.4.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  129. [ 69/1025] blk.4.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  130. [ 70/1025] blk.4.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  131. [ 71/1025] blk.4.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  132. [ 72/1025] blk.4.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  133. [ 73/1025] blk.4.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  134. [ 74/1025] blk.5.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  135. [ 75/1025] blk.5.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  136. [ 76/1025] blk.5.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  137. [ 77/1025] blk.5.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  138. [ 78/1025] blk.5.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  139. [ 79/1025] blk.5.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  140. [ 80/1025] blk.5.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  141. [ 81/1025] blk.5.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  142. [ 82/1025] blk.5.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  143. [ 83/1025] blk.5.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  144. [ 84/1025] blk.5.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  145. [ 85/1025] blk.5.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  146. [ 86/1025] blk.5.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  147. [ 87/1025] blk.5.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  148. [ 88/1025] blk.5.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  149. [ 89/1025] blk.5.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  150. [ 90/1025] blk.5.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  151. [ 91/1025] blk.6.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  152. [ 92/1025] blk.6.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  153. [ 93/1025] blk.6.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  154. [ 94/1025] blk.6.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  155. [ 95/1025] blk.6.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  156. [ 96/1025] blk.6.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  157. [ 97/1025] blk.6.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  158. [ 98/1025] blk.6.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  159. [ 99/1025] blk.6.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  160. [ 100/1025] blk.6.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  161. [ 101/1025] blk.6.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  162. [ 102/1025] blk.6.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  163. [ 103/1025] blk.6.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  164. [ 104/1025] blk.6.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  165. [ 105/1025] blk.6.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  166. [ 106/1025] blk.6.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  167. [ 107/1025] blk.6.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  168. [ 108/1025] blk.7.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  169. [ 109/1025] blk.7.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  170. [ 110/1025] blk.7.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  171. [ 111/1025] blk.7.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  172. [ 112/1025] blk.7.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  173. [ 113/1025] blk.7.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  174. [ 114/1025] blk.7.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  175. [ 115/1025] blk.7.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  176. [ 116/1025] blk.7.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  177. [ 117/1025] blk.7.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  178. [ 118/1025] blk.7.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  179. [ 119/1025] blk.7.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  180. [ 120/1025] blk.7.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  181. [ 121/1025] blk.7.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  182. [ 122/1025] blk.7.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  183. [ 123/1025] blk.7.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  184. [ 124/1025] blk.7.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  185. [ 125/1025] blk.8.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  186. [ 126/1025] blk.8.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  187. [ 127/1025] blk.8.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  188. [ 128/1025] blk.8.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  189. [ 129/1025] blk.8.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  190. [ 130/1025] blk.8.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  191. [ 131/1025] blk.8.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  192. [ 132/1025] blk.8.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  193. [ 133/1025] blk.8.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  194. [ 134/1025] blk.8.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  195. [ 135/1025] blk.8.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  196. [ 136/1025] blk.8.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  197. [ 137/1025] blk.8.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  198. [ 138/1025] blk.8.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  199. [ 139/1025] blk.8.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  200. [ 140/1025] blk.8.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  201. [ 141/1025] blk.8.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  202. [ 142/1025] blk.9.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  203. [ 143/1025] blk.9.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  204. [ 144/1025] blk.9.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  205. [ 145/1025] blk.9.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  206. [ 146/1025] blk.9.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  207. [ 147/1025] blk.9.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  208. [ 148/1025] blk.9.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  209. [ 149/1025] blk.9.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  210. [ 150/1025] blk.9.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  211. [ 151/1025] blk.9.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  212. [ 152/1025] blk.9.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  213. [ 153/1025] blk.9.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  214. [ 154/1025] blk.9.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  215. [ 155/1025] blk.9.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  216. [ 156/1025] blk.9.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  217. [ 157/1025] blk.9.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  218. [ 158/1025] blk.9.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  219. [ 159/1025] blk.10.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  220. [ 160/1025] blk.10.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  221. [ 161/1025] blk.10.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  222. [ 162/1025] blk.10.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  223. [ 163/1025] blk.10.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  224. [ 164/1025] blk.10.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  225. [ 165/1025] blk.10.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  226. [ 166/1025] blk.10.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  227. [ 167/1025] blk.10.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  228. [ 168/1025] blk.10.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  229. [ 169/1025] blk.10.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  230. [ 170/1025] blk.10.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  231. [ 171/1025] blk.10.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  232. [ 172/1025] blk.10.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  233. [ 173/1025] blk.10.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  234. [ 174/1025] blk.10.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  235. [ 175/1025] blk.10.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  236. [ 176/1025] blk.11.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  237. [ 177/1025] blk.11.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  238. [ 178/1025] blk.11.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  239. [ 179/1025] blk.11.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  240. [ 180/1025] blk.11.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  241. [ 181/1025] blk.11.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  242. [ 182/1025] blk.11.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  243. [ 183/1025] blk.11.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  244. [ 184/1025] blk.11.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  245. [ 185/1025] blk.11.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  246. [ 186/1025] blk.11.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  247. [ 187/1025] blk.11.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  248. [ 188/1025] blk.11.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  249. [ 189/1025] blk.11.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  250. [ 190/1025] blk.11.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  251. [ 191/1025] blk.11.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  252. [ 192/1025] blk.11.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  253. [ 193/1025] blk.12.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  254. [ 194/1025] blk.12.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  255. [ 195/1025] blk.12.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  256. [ 196/1025] blk.12.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  257. [ 197/1025] blk.12.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  258. [ 198/1025] blk.12.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  259. [ 199/1025] blk.12.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  260. [ 200/1025] blk.12.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  261. [ 201/1025] blk.12.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  262. [ 202/1025] blk.12.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  263. [ 203/1025] blk.12.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  264. [ 204/1025] blk.12.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  265. [ 205/1025] blk.12.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  266. [ 206/1025] blk.12.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  267. [ 207/1025] blk.12.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  268. [ 208/1025] blk.12.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  269. [ 209/1025] blk.12.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  270. [ 210/1025] blk.13.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  271. [ 211/1025] blk.13.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  272. [ 212/1025] blk.13.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  273. [ 213/1025] blk.13.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  274. [ 214/1025] blk.13.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  275. [ 215/1025] blk.13.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  276. [ 216/1025] blk.13.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  277. [ 217/1025] blk.13.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  278. [ 218/1025] blk.13.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  279. [ 219/1025] blk.13.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  280. [ 220/1025] blk.13.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  281. [ 221/1025] blk.13.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  282. [ 222/1025] blk.13.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  283. [ 223/1025] blk.13.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  284. [ 224/1025] blk.13.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  285. [ 225/1025] blk.13.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  286. [ 226/1025] blk.13.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  287. [ 227/1025] blk.14.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  288. [ 228/1025] blk.14.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  289. [ 229/1025] blk.14.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  290. [ 230/1025] blk.14.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  291. [ 231/1025] blk.14.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  292. [ 232/1025] blk.14.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  293. [ 233/1025] blk.14.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  294. [ 234/1025] blk.14.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  295. [ 235/1025] blk.14.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  296. [ 236/1025] blk.14.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  297. [ 237/1025] blk.14.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  298. [ 238/1025] blk.14.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  299. [ 239/1025] blk.14.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  300. [ 240/1025] blk.14.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  301. [ 241/1025] blk.14.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  302. [ 242/1025] blk.14.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  303. [ 243/1025] blk.14.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  304. [ 244/1025] blk.15.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  305. [ 245/1025] blk.15.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  306. [ 246/1025] blk.15.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  307. [ 247/1025] blk.15.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  308. [ 248/1025] blk.15.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  309. [ 249/1025] blk.15.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  310. [ 250/1025] blk.15.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  311. [ 251/1025] blk.15.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  312. [ 252/1025] blk.15.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  313. [ 253/1025] blk.15.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  314. [ 254/1025] blk.15.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  315. [ 255/1025] blk.15.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  316. [ 256/1025] blk.15.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  317. [ 257/1025] blk.15.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  318. [ 258/1025] blk.15.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  319. [ 259/1025] blk.15.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  320. [ 260/1025] blk.15.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  321. [ 261/1025] blk.16.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  322. [ 262/1025] blk.16.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  323. [ 263/1025] blk.16.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  324. [ 264/1025] blk.16.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  325. [ 265/1025] blk.16.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  326. [ 266/1025] blk.16.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  327. [ 267/1025] blk.16.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  328. [ 268/1025] blk.16.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  329. [ 269/1025] blk.16.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  330. [ 270/1025] blk.16.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  331. [ 271/1025] blk.16.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  332. [ 272/1025] blk.16.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  333. [ 273/1025] blk.16.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  334. [ 274/1025] blk.16.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  335. [ 275/1025] blk.16.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  336. [ 276/1025] blk.16.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  337. [ 277/1025] blk.16.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  338. [ 278/1025] blk.17.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  339. [ 279/1025] blk.17.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  340. [ 280/1025] blk.17.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  341. [ 281/1025] blk.17.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  342. [ 282/1025] blk.17.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  343. [ 283/1025] blk.17.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  344. [ 284/1025] blk.17.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  345. [ 285/1025] blk.17.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  346. [ 286/1025] blk.17.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  347. [ 287/1025] blk.17.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  348. [ 288/1025] blk.17.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  349. [ 289/1025] blk.17.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  350. [ 290/1025] blk.17.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  351. [ 291/1025] blk.17.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  352. [ 292/1025] blk.17.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  353. [ 293/1025] blk.17.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  354. [ 294/1025] blk.17.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  355. [ 295/1025] blk.18.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  356. [ 296/1025] blk.18.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  357. [ 297/1025] blk.18.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  358. [ 298/1025] blk.18.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  359. [ 299/1025] blk.18.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  360. [ 300/1025] blk.18.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  361. [ 301/1025] blk.18.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  362. [ 302/1025] blk.18.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  363. [ 303/1025] blk.18.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  364. [ 304/1025] blk.18.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  365. [ 305/1025] blk.18.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  366. [ 306/1025] blk.18.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  367. [ 307/1025] blk.18.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  368. [ 308/1025] blk.18.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  369. [ 309/1025] blk.18.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  370. [ 310/1025] blk.18.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  371. [ 311/1025] blk.18.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  372. [ 312/1025] blk.19.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  373. [ 313/1025] blk.19.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  374. [ 314/1025] blk.19.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  375. [ 315/1025] blk.19.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  376. [ 316/1025] blk.19.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  377. [ 317/1025] blk.19.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  378. [ 318/1025] blk.19.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  379. [ 319/1025] blk.19.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  380. [ 320/1025] blk.19.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  381. [ 321/1025] blk.19.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  382. [ 322/1025] blk.19.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  383. [ 323/1025] blk.19.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  384. [ 324/1025] blk.19.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  385. [ 325/1025] blk.19.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  386. [ 326/1025] blk.19.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  387. [ 327/1025] blk.19.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  388. [ 328/1025] blk.19.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  389. [ 329/1025] blk.20.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  390. [ 330/1025] blk.20.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  391. [ 331/1025] blk.20.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  392. [ 332/1025] blk.20.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  393. [ 333/1025] blk.20.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  394. [ 334/1025] blk.20.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  395. [ 335/1025] blk.20.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  396. [ 336/1025] blk.20.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  397. [ 337/1025] blk.20.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  398. [ 338/1025] blk.20.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  399. [ 339/1025] blk.20.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  400. [ 340/1025] blk.20.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  401. [ 341/1025] blk.20.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  402. [ 342/1025] blk.20.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  403. [ 343/1025] blk.20.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  404. [ 344/1025] blk.20.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  405. [ 345/1025] blk.20.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  406. [ 346/1025] blk.21.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  407. [ 347/1025] blk.21.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  408. [ 348/1025] blk.21.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  409. [ 349/1025] blk.21.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  410. [ 350/1025] blk.21.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  411. [ 351/1025] blk.21.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  412. [ 352/1025] blk.21.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  413. [ 353/1025] blk.21.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  414. [ 354/1025] blk.21.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  415. [ 355/1025] blk.21.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  416. [ 356/1025] blk.21.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  417. [ 357/1025] blk.21.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  418. [ 358/1025] blk.21.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  419. [ 359/1025] blk.21.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  420. [ 360/1025] blk.21.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  421. [ 361/1025] blk.21.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  422. [ 362/1025] blk.21.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  423. [ 363/1025] blk.22.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  424. [ 364/1025] blk.22.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  425. [ 365/1025] blk.22.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  426. [ 366/1025] blk.22.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  427. [ 367/1025] blk.22.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  428. [ 368/1025] blk.22.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  429. [ 369/1025] blk.22.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  430. [ 370/1025] blk.22.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  431. [ 371/1025] blk.22.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  432. [ 372/1025] blk.22.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  433. [ 373/1025] blk.22.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  434. [ 374/1025] blk.22.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  435. [ 375/1025] blk.22.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  436. [ 376/1025] blk.22.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  437. [ 377/1025] blk.22.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  438. [ 378/1025] blk.22.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  439. [ 379/1025] blk.22.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  440. [ 380/1025] blk.23.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  441. [ 381/1025] blk.23.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  442. [ 382/1025] blk.23.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  443. [ 383/1025] blk.23.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  444. [ 384/1025] blk.23.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  445. [ 385/1025] blk.23.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  446. [ 386/1025] blk.23.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  447. [ 387/1025] blk.23.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  448. [ 388/1025] blk.23.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  449. [ 389/1025] blk.23.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  450. [ 390/1025] blk.23.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  451. [ 391/1025] blk.23.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  452. [ 392/1025] blk.23.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  453. [ 393/1025] blk.23.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  454. [ 394/1025] blk.23.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  455. [ 395/1025] blk.23.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  456. [ 396/1025] blk.23.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  457. [ 397/1025] blk.24.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  458. [ 398/1025] blk.24.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  459. [ 399/1025] blk.24.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  460. [ 400/1025] blk.24.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  461. [ 401/1025] blk.24.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  462. [ 402/1025] blk.24.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  463. [ 403/1025] blk.24.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  464. [ 404/1025] blk.24.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  465. [ 405/1025] blk.24.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  466. [ 406/1025] blk.24.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  467. [ 407/1025] blk.24.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  468. [ 408/1025] blk.24.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  469. [ 409/1025] blk.24.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  470. [ 410/1025] blk.24.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  471. [ 411/1025] blk.24.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  472. [ 412/1025] blk.24.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  473. [ 413/1025] blk.24.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  474. [ 414/1025] blk.25.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  475. [ 415/1025] blk.25.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  476. [ 416/1025] blk.25.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  477. [ 417/1025] blk.25.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  478. [ 418/1025] blk.25.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  479. [ 419/1025] blk.25.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  480. [ 420/1025] blk.25.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  481. [ 421/1025] blk.25.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  482. [ 422/1025] blk.25.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  483. [ 423/1025] blk.25.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  484. [ 424/1025] blk.25.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  485. [ 425/1025] blk.25.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  486. [ 426/1025] blk.25.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  487. [ 427/1025] blk.25.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  488. [ 428/1025] blk.25.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  489. [ 429/1025] blk.25.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  490. [ 430/1025] blk.25.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  491. [ 431/1025] blk.26.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  492. [ 432/1025] blk.26.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  493. [ 433/1025] blk.26.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  494. [ 434/1025] blk.26.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  495. [ 435/1025] blk.26.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  496. [ 436/1025] blk.26.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  497. [ 437/1025] blk.26.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  498. [ 438/1025] blk.26.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  499. [ 439/1025] blk.26.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  500. [ 440/1025] blk.26.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  501. [ 441/1025] blk.26.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  502. [ 442/1025] blk.26.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  503. [ 443/1025] blk.26.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  504. [ 444/1025] blk.26.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  505. [ 445/1025] blk.26.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  506. [ 446/1025] blk.26.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  507. [ 447/1025] blk.26.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  508. [ 448/1025] blk.27.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  509. [ 449/1025] blk.27.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  510. [ 450/1025] blk.27.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  511. [ 451/1025] blk.27.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  512. [ 452/1025] blk.27.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  513. [ 453/1025] blk.27.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  514. [ 454/1025] blk.27.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  515. [ 455/1025] blk.27.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  516. [ 456/1025] blk.27.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  517. [ 457/1025] blk.27.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  518. [ 458/1025] blk.27.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  519. [ 459/1025] blk.27.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  520. [ 460/1025] blk.27.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  521. [ 461/1025] blk.27.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  522. [ 462/1025] blk.27.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  523. [ 463/1025] blk.27.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  524. [ 464/1025] blk.27.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  525. [ 465/1025] blk.28.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  526. [ 466/1025] blk.28.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  527. [ 467/1025] blk.28.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  528. [ 468/1025] blk.28.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  529. [ 469/1025] blk.28.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  530. [ 470/1025] blk.28.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  531. [ 471/1025] blk.28.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  532. [ 472/1025] blk.28.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  533. [ 473/1025] blk.28.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  534. [ 474/1025] blk.28.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  535. [ 475/1025] blk.28.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  536. [ 476/1025] blk.28.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  537. [ 477/1025] blk.28.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  538. [ 478/1025] blk.28.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  539. [ 479/1025] blk.28.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  540. [ 480/1025] blk.28.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  541. [ 481/1025] blk.28.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  542. [ 482/1025] blk.29.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  543. [ 483/1025] blk.29.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  544. [ 484/1025] blk.29.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  545. [ 485/1025] blk.29.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  546. [ 486/1025] blk.29.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  547. [ 487/1025] blk.29.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  548. [ 488/1025] blk.29.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  549. [ 489/1025] blk.29.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  550. [ 490/1025] blk.29.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  551. [ 491/1025] blk.29.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  552. [ 492/1025] blk.29.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  553. [ 493/1025] blk.29.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  554. [ 494/1025] blk.29.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  555. [ 495/1025] blk.29.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  556. [ 496/1025] blk.29.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  557. [ 497/1025] blk.29.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  558. [ 498/1025] blk.29.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  559. [ 499/1025] blk.30.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  560. [ 500/1025] blk.30.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  561. [ 501/1025] blk.30.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  562. [ 502/1025] blk.30.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  563. [ 503/1025] blk.30.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  564. [ 504/1025] blk.30.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  565. [ 505/1025] blk.30.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  566. [ 506/1025] blk.30.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  567. [ 507/1025] blk.30.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  568. [ 508/1025] blk.30.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  569. [ 509/1025] blk.30.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  570. [ 510/1025] blk.30.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  571. [ 511/1025] blk.30.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  572. [ 512/1025] blk.30.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  573. [ 513/1025] blk.30.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  574. [ 514/1025] blk.30.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  575. [ 515/1025] blk.30.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  576. [ 516/1025] blk.31.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  577. [ 517/1025] blk.31.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  578. [ 518/1025] blk.31.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  579. [ 519/1025] blk.31.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  580. [ 520/1025] blk.31.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  581. [ 521/1025] blk.31.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  582. [ 522/1025] blk.31.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  583. [ 523/1025] blk.31.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  584. [ 524/1025] blk.31.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  585. [ 525/1025] blk.31.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  586. [ 526/1025] blk.31.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  587. [ 527/1025] blk.31.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  588. [ 528/1025] blk.31.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  589. [ 529/1025] blk.31.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  590. [ 530/1025] blk.31.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  591. [ 531/1025] blk.31.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  592. [ 532/1025] blk.31.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  593. [ 533/1025] blk.32.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  594. [ 534/1025] blk.32.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  595. [ 535/1025] blk.32.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  596. [ 536/1025] blk.32.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  597. [ 537/1025] blk.32.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  598. [ 538/1025] blk.32.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  599. [ 539/1025] blk.32.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  600. [ 540/1025] blk.32.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  601. [ 541/1025] blk.32.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  602. [ 542/1025] blk.32.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  603. [ 543/1025] blk.32.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  604. [ 544/1025] blk.32.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  605. [ 545/1025] blk.32.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  606. [ 546/1025] blk.32.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  607. [ 547/1025] blk.32.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  608. [ 548/1025] blk.32.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  609. [ 549/1025] blk.32.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  610. [ 550/1025] blk.33.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  611. [ 551/1025] blk.33.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  612. [ 552/1025] blk.33.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  613. [ 553/1025] blk.33.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  614. [ 554/1025] blk.33.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  615. [ 555/1025] blk.33.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  616. [ 556/1025] blk.33.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  617. [ 557/1025] blk.33.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  618. [ 558/1025] blk.33.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  619. [ 559/1025] blk.33.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  620. [ 560/1025] blk.33.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  621. [ 561/1025] blk.33.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  622. [ 562/1025] blk.33.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  623. [ 563/1025] blk.33.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  624. [ 564/1025] blk.33.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  625. [ 565/1025] blk.33.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  626. [ 566/1025] blk.33.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  627. [ 567/1025] blk.34.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  628. [ 568/1025] blk.34.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  629. [ 569/1025] blk.34.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  630. [ 570/1025] blk.34.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  631. [ 571/1025] blk.34.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  632. [ 572/1025] blk.34.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  633. [ 573/1025] blk.34.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  634. [ 574/1025] blk.34.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  635. [ 575/1025] blk.34.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  636. [ 576/1025] blk.34.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  637. [ 577/1025] blk.34.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  638. [ 578/1025] blk.34.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  639. [ 579/1025] blk.34.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  640. [ 580/1025] blk.34.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  641. [ 581/1025] blk.34.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  642. [ 582/1025] blk.34.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  643. [ 583/1025] blk.34.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  644. [ 584/1025] blk.35.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  645. [ 585/1025] blk.35.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  646. [ 586/1025] blk.35.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  647. [ 587/1025] blk.35.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  648. [ 588/1025] blk.35.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  649. [ 589/1025] blk.35.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  650. [ 590/1025] blk.35.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  651. [ 591/1025] blk.35.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  652. [ 592/1025] blk.35.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  653. [ 593/1025] blk.35.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  654. [ 594/1025] blk.35.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  655. [ 595/1025] blk.35.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  656. [ 596/1025] blk.35.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  657. [ 597/1025] blk.35.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  658. [ 598/1025] blk.35.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  659. [ 599/1025] blk.35.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  660. [ 600/1025] blk.35.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  661. [ 601/1025] blk.36.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  662. [ 602/1025] blk.36.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  663. [ 603/1025] blk.36.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  664. [ 604/1025] blk.36.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  665. [ 605/1025] blk.36.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  666. [ 606/1025] blk.36.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  667. [ 607/1025] blk.36.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  668. [ 608/1025] blk.36.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  669. [ 609/1025] blk.36.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  670. [ 610/1025] blk.36.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  671. [ 611/1025] blk.36.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  672. [ 612/1025] blk.36.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  673. [ 613/1025] blk.36.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  674. [ 614/1025] blk.36.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  675. [ 615/1025] blk.36.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  676. [ 616/1025] blk.36.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  677. [ 617/1025] blk.36.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  678. [ 618/1025] blk.37.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  679. [ 619/1025] blk.37.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  680. [ 620/1025] blk.37.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  681. [ 621/1025] blk.37.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  682. [ 622/1025] blk.37.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  683. [ 623/1025] blk.37.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  684. [ 624/1025] blk.37.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  685. [ 625/1025] blk.37.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  686. [ 626/1025] blk.37.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  687. [ 627/1025] blk.37.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  688. [ 628/1025] blk.37.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  689. [ 629/1025] blk.37.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  690. [ 630/1025] blk.37.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  691. [ 631/1025] blk.37.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  692. [ 632/1025] blk.37.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  693. [ 633/1025] blk.37.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  694. [ 634/1025] blk.37.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  695. [ 635/1025] blk.38.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  696. [ 636/1025] blk.38.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  697. [ 637/1025] blk.38.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  698. [ 638/1025] blk.38.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  699. [ 639/1025] blk.38.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  700. [ 640/1025] blk.38.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  701. [ 641/1025] blk.38.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  702. [ 642/1025] blk.38.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  703. [ 643/1025] blk.38.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  704. [ 644/1025] blk.38.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  705. [ 645/1025] blk.38.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  706. [ 646/1025] blk.38.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  707. [ 647/1025] blk.38.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  708. [ 648/1025] blk.38.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  709. [ 649/1025] blk.38.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  710. [ 650/1025] blk.38.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  711. [ 651/1025] blk.38.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  712. [ 652/1025] blk.39.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  713. [ 653/1025] blk.39.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  714. [ 654/1025] blk.39.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  715. [ 655/1025] blk.39.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  716. [ 656/1025] blk.39.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  717. [ 657/1025] blk.39.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  718. [ 658/1025] blk.39.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  719. [ 659/1025] blk.39.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  720. [ 660/1025] blk.39.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  721. [ 661/1025] blk.39.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  722. [ 662/1025] blk.39.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  723. [ 663/1025] blk.39.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  724. [ 664/1025] blk.39.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  725. [ 665/1025] blk.39.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  726. [ 666/1025] blk.39.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  727. [ 667/1025] blk.39.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  728. [ 668/1025] blk.39.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  729. [ 669/1025] blk.40.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  730. [ 670/1025] blk.40.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  731. [ 671/1025] blk.40.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  732. [ 672/1025] blk.40.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  733. [ 673/1025] blk.40.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  734. [ 674/1025] blk.40.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  735. [ 675/1025] blk.40.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  736. [ 676/1025] blk.40.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  737. [ 677/1025] blk.40.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  738. [ 678/1025] blk.40.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  739. [ 679/1025] blk.40.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  740. [ 680/1025] blk.40.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  741. [ 681/1025] blk.40.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  742. [ 682/1025] blk.40.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  743. [ 683/1025] blk.40.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  744. [ 684/1025] blk.40.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  745. [ 685/1025] blk.40.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  746. [ 686/1025] blk.41.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  747. [ 687/1025] blk.41.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  748. [ 688/1025] blk.41.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  749. [ 689/1025] blk.41.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  750. [ 690/1025] blk.41.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  751. [ 691/1025] blk.41.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  752. [ 692/1025] blk.41.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  753. [ 693/1025] blk.41.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  754. [ 694/1025] blk.41.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  755. [ 695/1025] blk.41.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  756. [ 696/1025] blk.41.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  757. [ 697/1025] blk.41.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  758. [ 698/1025] blk.41.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  759. [ 699/1025] blk.41.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  760. [ 700/1025] blk.41.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  761. [ 701/1025] blk.41.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  762. [ 702/1025] blk.41.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  763. [ 703/1025] blk.42.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  764. [ 704/1025] blk.42.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  765. [ 705/1025] blk.42.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  766. [ 706/1025] blk.42.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  767. [ 707/1025] blk.42.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  768. [ 708/1025] blk.42.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  769. [ 709/1025] blk.42.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  770. [ 710/1025] blk.42.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  771. [ 711/1025] blk.42.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  772. [ 712/1025] blk.42.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  773. [ 713/1025] blk.42.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  774. [ 714/1025] blk.42.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  775. [ 715/1025] blk.42.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  776. [ 716/1025] blk.42.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  777. [ 717/1025] blk.42.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  778. [ 718/1025] blk.42.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  779. [ 719/1025] blk.42.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  780. [ 720/1025] blk.43.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  781. [ 721/1025] blk.43.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  782. [ 722/1025] blk.43.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  783. [ 723/1025] blk.43.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  784. [ 724/1025] blk.43.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  785. [ 725/1025] blk.43.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  786. [ 726/1025] blk.43.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  787. [ 727/1025] blk.43.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  788. [ 728/1025] blk.43.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  789. [ 729/1025] blk.43.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  790. [ 730/1025] blk.43.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  791. [ 731/1025] blk.43.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  792. [ 732/1025] blk.43.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  793. [ 733/1025] blk.43.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  794. [ 734/1025] blk.43.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  795. [ 735/1025] blk.43.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  796. [ 736/1025] blk.43.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  797. [ 737/1025] blk.44.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  798. [ 738/1025] blk.44.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  799. [ 739/1025] blk.44.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  800. [ 740/1025] blk.44.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  801. [ 741/1025] blk.44.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  802. [ 742/1025] blk.44.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  803. [ 743/1025] blk.44.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  804. [ 744/1025] blk.44.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  805. [ 745/1025] blk.44.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  806. [ 746/1025] blk.44.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  807. [ 747/1025] blk.44.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  808. [ 748/1025] blk.44.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  809. [ 749/1025] blk.44.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  810. [ 750/1025] blk.44.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  811. [ 751/1025] blk.44.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  812. [ 752/1025] blk.44.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  813. [ 753/1025] blk.44.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  814. [ 754/1025] blk.45.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  815. [ 755/1025] blk.45.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  816. [ 756/1025] blk.45.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  817. [ 757/1025] blk.45.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  818. [ 758/1025] blk.45.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  819. [ 759/1025] blk.45.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  820. [ 760/1025] blk.45.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  821. [ 761/1025] blk.45.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  822. [ 762/1025] blk.45.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  823. [ 763/1025] blk.45.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  824. [ 764/1025] blk.45.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  825. [ 765/1025] blk.45.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  826. [ 766/1025] blk.45.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  827. [ 767/1025] blk.45.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  828. [ 768/1025] blk.45.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  829. [ 769/1025] blk.45.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  830. [ 770/1025] blk.45.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  831. [ 771/1025] blk.46.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  832. [ 772/1025] blk.46.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  833. [ 773/1025] blk.46.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  834. [ 774/1025] blk.46.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  835. [ 775/1025] blk.46.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  836. [ 776/1025] blk.46.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  837. [ 777/1025] blk.46.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  838. [ 778/1025] blk.46.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  839. [ 779/1025] blk.46.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  840. [ 780/1025] blk.46.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  841. [ 781/1025] blk.46.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  842. [ 782/1025] blk.46.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  843. [ 783/1025] blk.46.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  844. [ 784/1025] blk.46.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  845. [ 785/1025] blk.46.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  846. [ 786/1025] blk.46.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  847. [ 787/1025] blk.46.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  848. [ 788/1025] blk.47.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  849. [ 789/1025] blk.47.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  850. [ 790/1025] blk.47.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  851. [ 791/1025] blk.47.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  852. [ 792/1025] blk.47.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  853. [ 793/1025] blk.47.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  854. [ 794/1025] blk.47.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  855. [ 795/1025] blk.47.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  856. [ 796/1025] blk.47.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  857. [ 797/1025] blk.47.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  858. [ 798/1025] blk.47.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  859. [ 799/1025] blk.47.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  860. [ 800/1025] blk.47.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  861. [ 801/1025] blk.47.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  862. [ 802/1025] blk.47.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  863. [ 803/1025] blk.47.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  864. [ 804/1025] blk.47.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  865. [ 805/1025] blk.48.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  866. [ 806/1025] blk.48.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  867. [ 807/1025] blk.48.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  868. [ 808/1025] blk.48.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  869. [ 809/1025] blk.48.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  870. [ 810/1025] blk.48.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  871. [ 811/1025] blk.48.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  872. [ 812/1025] blk.48.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  873. [ 813/1025] blk.48.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  874. [ 814/1025] blk.48.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  875. [ 815/1025] blk.48.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  876. [ 816/1025] blk.48.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  877. [ 817/1025] blk.48.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  878. [ 818/1025] blk.48.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  879. [ 819/1025] blk.48.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  880. [ 820/1025] blk.48.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  881. [ 821/1025] blk.48.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  882. [ 822/1025] blk.49.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  883. [ 823/1025] blk.49.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  884. [ 824/1025] blk.49.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  885. [ 825/1025] blk.49.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  886. [ 826/1025] blk.49.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  887. [ 827/1025] blk.49.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  888. [ 828/1025] blk.49.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  889. [ 829/1025] blk.49.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  890. [ 830/1025] blk.49.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  891. [ 831/1025] blk.49.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  892. [ 832/1025] blk.49.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  893. [ 833/1025] blk.49.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  894. [ 834/1025] blk.49.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  895. [ 835/1025] blk.49.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  896. [ 836/1025] blk.49.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  897. [ 837/1025] blk.49.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  898. [ 838/1025] blk.49.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  899. [ 839/1025] blk.50.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  900. [ 840/1025] blk.50.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  901. [ 841/1025] blk.50.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  902. [ 842/1025] blk.50.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  903. [ 843/1025] blk.50.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  904. [ 844/1025] blk.50.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  905. [ 845/1025] blk.50.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  906. [ 846/1025] blk.50.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  907. [ 847/1025] blk.50.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  908. [ 848/1025] blk.50.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  909. [ 849/1025] blk.50.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  910. [ 850/1025] blk.50.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  911. [ 851/1025] blk.50.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  912. [ 852/1025] blk.50.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  913. [ 853/1025] blk.50.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  914. [ 854/1025] blk.50.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  915. [ 855/1025] blk.50.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  916. [ 856/1025] blk.51.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  917. [ 857/1025] blk.51.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  918. [ 858/1025] blk.51.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  919. [ 859/1025] blk.51.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  920. [ 860/1025] blk.51.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  921. [ 861/1025] blk.51.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  922. [ 862/1025] blk.51.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  923. [ 863/1025] blk.51.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  924. [ 864/1025] blk.51.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  925. [ 865/1025] blk.51.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  926. [ 866/1025] blk.51.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  927. [ 867/1025] blk.51.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  928. [ 868/1025] blk.51.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  929. [ 869/1025] blk.51.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  930. [ 870/1025] blk.51.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  931. [ 871/1025] blk.51.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  932. [ 872/1025] blk.51.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  933. [ 873/1025] blk.52.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  934. [ 874/1025] blk.52.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  935. [ 875/1025] blk.52.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  936. [ 876/1025] blk.52.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  937. [ 877/1025] blk.52.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  938. [ 878/1025] blk.52.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  939. [ 879/1025] blk.52.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  940. [ 880/1025] blk.52.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  941. [ 881/1025] blk.52.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  942. [ 882/1025] blk.52.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  943. [ 883/1025] blk.52.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  944. [ 884/1025] blk.52.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  945. [ 885/1025] blk.52.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  946. [ 886/1025] blk.52.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  947. [ 887/1025] blk.52.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  948. [ 888/1025] blk.52.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  949. [ 889/1025] blk.52.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  950. [ 890/1025] blk.53.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  951. [ 891/1025] blk.53.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  952. [ 892/1025] blk.53.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  953. [ 893/1025] blk.53.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  954. [ 894/1025] blk.53.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  955. [ 895/1025] blk.53.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  956. [ 896/1025] blk.53.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  957. [ 897/1025] blk.53.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  958. [ 898/1025] blk.53.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  959. [ 899/1025] blk.53.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  960. [ 900/1025] blk.53.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  961. [ 901/1025] blk.53.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  962. [ 902/1025] blk.53.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  963. [ 903/1025] blk.53.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  964. [ 904/1025] blk.53.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  965. [ 905/1025] blk.53.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  966. [ 906/1025] blk.53.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  967. [ 907/1025] blk.54.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  968. [ 908/1025] blk.54.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  969. [ 909/1025] blk.54.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  970. [ 910/1025] blk.54.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  971. [ 911/1025] blk.54.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  972. [ 912/1025] blk.54.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  973. [ 913/1025] blk.54.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  974. [ 914/1025] blk.54.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  975. [ 915/1025] blk.54.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  976. [ 916/1025] blk.54.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  977. [ 917/1025] blk.54.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  978. [ 918/1025] blk.54.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  979. [ 919/1025] blk.54.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  980. [ 920/1025] blk.54.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  981. [ 921/1025] blk.54.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  982. [ 922/1025] blk.54.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  983. [ 923/1025] blk.54.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  984. [ 924/1025] blk.55.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  985. [ 925/1025] blk.55.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  986. [ 926/1025] blk.55.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  987. [ 927/1025] blk.55.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  988. [ 928/1025] blk.55.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  989. [ 929/1025] blk.55.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  990. [ 930/1025] blk.55.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  991. [ 931/1025] blk.55.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  992. [ 932/1025] blk.55.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  993. [ 933/1025] blk.55.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  994. [ 934/1025] blk.55.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  995. [ 935/1025] blk.55.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  996. [ 936/1025] blk.55.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  997. [ 937/1025] blk.55.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  998. [ 938/1025] blk.55.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  999. [ 939/1025] blk.55.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1000. [ 940/1025] blk.55.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1001. [ 941/1025] blk.56.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  1002. [ 942/1025] blk.56.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  1003. [ 943/1025] blk.56.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  1004. [ 944/1025] blk.56.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1005. [ 945/1025] blk.56.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  1006. [ 946/1025] blk.56.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  1007. [ 947/1025] blk.56.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  1008. [ 948/1025] blk.56.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  1009. [ 949/1025] blk.56.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  1010. [ 950/1025] blk.56.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  1011. [ 951/1025] blk.56.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  1012. [ 952/1025] blk.56.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1013. [ 953/1025] blk.56.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  1014. [ 954/1025] blk.56.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1015. [ 955/1025] blk.56.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1016. [ 956/1025] blk.56.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1017. [ 957/1025] blk.56.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1018. [ 958/1025] blk.57.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  1019. [ 959/1025] blk.57.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  1020. [ 960/1025] blk.57.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  1021. [ 961/1025] blk.57.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1022. [ 962/1025] blk.57.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  1023. [ 963/1025] blk.57.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  1024. [ 964/1025] blk.57.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  1025. [ 965/1025] blk.57.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  1026. [ 966/1025] blk.57.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  1027. [ 967/1025] blk.57.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  1028. [ 968/1025] blk.57.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  1029. [ 969/1025] blk.57.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1030. [ 970/1025] blk.57.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  1031. [ 971/1025] blk.57.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1032. [ 972/1025] blk.57.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1033. [ 973/1025] blk.57.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1034. [ 974/1025] blk.57.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1035. [ 975/1025] blk.58.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  1036. [ 976/1025] blk.58.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  1037. [ 977/1025] blk.58.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  1038. [ 978/1025] blk.58.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1039. [ 979/1025] blk.58.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  1040. [ 980/1025] blk.58.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  1041. [ 981/1025] blk.58.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  1042. [ 982/1025] blk.58.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  1043. [ 983/1025] blk.58.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  1044. [ 984/1025] blk.58.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  1045. [ 985/1025] blk.58.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  1046. [ 986/1025] blk.58.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1047. [ 987/1025] blk.58.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  1048. [ 988/1025] blk.58.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1049. [ 989/1025] blk.58.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1050. [ 990/1025] blk.58.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1051. [ 991/1025] blk.58.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1052. [ 992/1025] blk.59.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  1053. [ 993/1025] blk.59.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  1054. [ 994/1025] blk.59.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  1055. [ 995/1025] blk.59.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1056. [ 996/1025] blk.59.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  1057. [ 997/1025] blk.59.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  1058. [ 998/1025] blk.59.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  1059. [ 999/1025] blk.59.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  1060. [1000/1025] blk.59.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  1061. [1001/1025] blk.59.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  1062. [1002/1025] blk.59.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  1063. [1003/1025] blk.59.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1064. [1004/1025] blk.59.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  1065. [1005/1025] blk.59.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1066. [1006/1025] blk.59.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1067. [1007/1025] blk.59.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1068. [1008/1025] blk.59.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1069. [1009/1025] blk.60.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
  1070. [1010/1025] blk.60.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
  1071. [1011/1025] blk.60.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
  1072. [1012/1025] blk.60.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1073. [1013/1025] blk.60.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
  1074. [1014/1025] blk.60.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
  1075. [1015/1025] blk.60.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
  1076. [1016/1025] blk.60.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
  1077. [1017/1025] blk.60.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
  1078. [1018/1025] blk.60.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
  1079. [1019/1025] blk.60.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
  1080. [1020/1025] blk.60.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1081. [1021/1025] blk.60.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
  1082. [1022/1025] blk.60.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1083. [1023/1025] blk.60.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
  1084. [1024/1025] blk.60.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
  1085. [1025/1025] blk.60.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
  1086. llama_model_quantize_internal: model size = 386115.06 MB
  1087. llama_model_quantize_internal: quant size = 386115.06 MB
  1088.  
  1089. main: quantize time = 5230179.40 ms
  1090. main: total time = 5230179.40 ms
Advertisement
Add Comment
Please, Sign In to add comment