Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- > ~/pkgs/ik_llama.cpp/build/bin/llama-quantize --repack --repack-pattern exps /home/lissanro/pkgs/text-generation-webui/models/DeepSeek-V3-0324-GGUF-UD-Q4_K_XL/DeepSeek-V3-0324-UD-Q4_K_XL-00001-of-00009.gguf /mnt/secondary/tmp/DeepSeek-V3-0324-GGUF-UD-Q4_K_R4.gguf q4_k_r4
- main: build = 3630 (5f44f4b3)
- main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 for x86_64-linux-gnu
- main: quantizing '/home/lissanro/pkgs/text-generation-webui/models/DeepSeek-V3-0324-GGUF-UD-Q4_K_XL/DeepSeek-V3-0324-UD-Q4_K_XL-00001-of-00009.gguf' to '/mnt/secondary/tmp/DeepSeek-V3-0324-GGUF-UD-Q4_K_R4.gguf' as Q4_K_R4
- llama_model_loader: additional 8 GGUFs metadata loaded.
- llama_model_loader: loaded meta data with 49 key-value pairs and 1025 tensors from /home/lissanro/pkgs/text-generation-webui/models/DeepSeek-V3-0324-GGUF-UD-Q4_K_XL/DeepSeek-V3-0324-UD-Q4_K_XL-00001-of-00009.gguf (version GGUF V3 (latest))
- llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
- llama_model_loader: - kv 0: general.architecture str = deepseek2
- llama_model_loader: - kv 1: general.type str = model
- llama_model_loader: - kv 2: general.name str = DeepSeek V3 0324 BF16
- llama_model_loader: - kv 3: general.quantized_by str = Unsloth
- llama_model_loader: - kv 4: general.size_label str = 256x20B
- llama_model_loader: - kv 5: general.license str = mit
- llama_model_loader: - kv 6: general.repo_url str = https://huggingface.co/unsloth
- llama_model_loader: - kv 7: deepseek2.block_count u32 = 61
- llama_model_loader: - kv 8: deepseek2.context_length u32 = 163840
- llama_model_loader: - kv 9: deepseek2.embedding_length u32 = 7168
- llama_model_loader: - kv 10: deepseek2.feed_forward_length u32 = 18432
- llama_model_loader: - kv 11: deepseek2.attention.head_count u32 = 128
- llama_model_loader: - kv 12: deepseek2.attention.head_count_kv u32 = 128
- llama_model_loader: - kv 13: deepseek2.rope.freq_base f32 = 10000.000000
- llama_model_loader: - kv 14: deepseek2.attention.layer_norm_rms_epsilon f32 = 0.000001
- llama_model_loader: - kv 15: deepseek2.expert_used_count u32 = 8
- llama_model_loader: - kv 16: deepseek2.leading_dense_block_count u32 = 3
- llama_model_loader: - kv 17: deepseek2.vocab_size u32 = 129280
- llama_model_loader: - kv 18: deepseek2.attention.q_lora_rank u32 = 1536
- llama_model_loader: - kv 19: deepseek2.attention.kv_lora_rank u32 = 512
- llama_model_loader: - kv 20: deepseek2.attention.key_length u32 = 192
- llama_model_loader: - kv 21: deepseek2.attention.value_length u32 = 128
- llama_model_loader: - kv 22: deepseek2.expert_feed_forward_length u32 = 2048
- llama_model_loader: - kv 23: deepseek2.expert_count u32 = 256
- llama_model_loader: - kv 24: deepseek2.expert_shared_count u32 = 1
- llama_model_loader: - kv 25: deepseek2.expert_weights_scale f32 = 2.500000
- llama_model_loader: - kv 26: deepseek2.expert_weights_norm bool = true
- llama_model_loader: - kv 27: deepseek2.expert_gating_func u32 = 2
- llama_model_loader: - kv 28: deepseek2.rope.dimension_count u32 = 64
- llama_model_loader: - kv 29: deepseek2.rope.scaling.type str = yarn
- llama_model_loader: - kv 30: deepseek2.rope.scaling.factor f32 = 40.000000
- llama_model_loader: - kv 31: deepseek2.rope.scaling.original_context_length u32 = 4096
- llama_model_loader: - kv 32: deepseek2.rope.scaling.yarn_log_multiplier f32 = 0.100000
- llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
- llama_model_loader: - kv 34: tokenizer.ggml.pre str = deepseek-v3
- llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,129280] = ["<|begin▁of▁sentence|>", "<�...
- llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,129280] = [3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
- llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,127741] = ["Ġ t", "Ġ a", "i n", "Ġ Ġ", "h e...
- llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 0
- llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 1
- llama_model_loader: - kv 40: tokenizer.ggml.padding_token_id u32 = 1
- llama_model_loader: - kv 41: tokenizer.ggml.add_bos_token bool = true
- llama_model_loader: - kv 42: tokenizer.ggml.add_eos_token bool = false
- llama_model_loader: - kv 43: tokenizer.chat_template str = {% if not add_generation_prompt is de...
- llama_model_loader: - kv 44: general.quantization_version u32 = 2
- llama_model_loader: - kv 45: general.file_type u32 = 15
- llama_model_loader: - kv 46: split.no u16 = 0
- llama_model_loader: - kv 47: split.tensors.count i32 = 1025
- llama_model_loader: - kv 48: split.count u16 = 9
- llama_model_loader: - type f32: 361 tensors
- llama_model_loader: - type q4_K: 453 tensors
- llama_model_loader: - type q6_K: 211 tensors
- ===================== Model ftype: Q4_K - Medium: Repacked ftype: Q4_K_R4
- [ 1/1025] output.weight - [ 7168, 129280, 1, 1], type = q6_K, size = 724.951 MB, type = q6_K
- [ 2/1025] output_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 3/1025] token_embd.weight - [ 7168, 129280, 1, 1], type = q4_K, size = 497.109 MB, type = q4_K
- [ 4/1025] blk.0.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 5/1025] blk.0.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 6/1025] blk.0.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 7/1025] blk.0.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 8/1025] blk.0.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 9/1025] blk.0.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 10/1025] blk.0.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 11/1025] blk.0.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 12/1025] blk.0.ffn_down.weight - [18432, 7168, 1, 1], type = q6_K, size = 103.359 MB, type = q6_K
- [ 13/1025] blk.0.ffn_gate.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
- [ 14/1025] blk.0.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 15/1025] blk.0.ffn_up.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
- [ 16/1025] blk.1.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 17/1025] blk.1.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 18/1025] blk.1.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 19/1025] blk.1.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 20/1025] blk.1.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 21/1025] blk.1.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 22/1025] blk.1.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 23/1025] blk.1.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 24/1025] blk.1.ffn_down.weight - [18432, 7168, 1, 1], type = q6_K, size = 103.359 MB, type = q6_K
- [ 25/1025] blk.1.ffn_gate.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
- [ 26/1025] blk.1.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 27/1025] blk.1.ffn_up.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
- [ 28/1025] blk.2.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 29/1025] blk.2.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 30/1025] blk.2.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 31/1025] blk.2.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 32/1025] blk.2.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 33/1025] blk.2.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 34/1025] blk.2.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 35/1025] blk.2.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 36/1025] blk.2.ffn_down.weight - [18432, 7168, 1, 1], type = q6_K, size = 103.359 MB, type = q6_K
- [ 37/1025] blk.2.ffn_gate.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
- [ 38/1025] blk.2.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 39/1025] blk.2.ffn_up.weight - [ 7168, 18432, 1, 1], type = q4_K, size = 70.875 MB, type = q4_K
- [ 40/1025] blk.3.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 41/1025] blk.3.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 42/1025] blk.3.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 43/1025] blk.3.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 44/1025] blk.3.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 45/1025] blk.3.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 46/1025] blk.3.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 47/1025] blk.3.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 48/1025] blk.3.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 49/1025] blk.3.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 50/1025] blk.3.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 51/1025] blk.3.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 52/1025] blk.3.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 53/1025] blk.3.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 54/1025] blk.3.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 55/1025] blk.3.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 56/1025] blk.3.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 57/1025] blk.4.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 58/1025] blk.4.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 59/1025] blk.4.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 60/1025] blk.4.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 61/1025] blk.4.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 62/1025] blk.4.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 63/1025] blk.4.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 64/1025] blk.4.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 65/1025] blk.4.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 66/1025] blk.4.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 67/1025] blk.4.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 68/1025] blk.4.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 69/1025] blk.4.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 70/1025] blk.4.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 71/1025] blk.4.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 72/1025] blk.4.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 73/1025] blk.4.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 74/1025] blk.5.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 75/1025] blk.5.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 76/1025] blk.5.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 77/1025] blk.5.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 78/1025] blk.5.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 79/1025] blk.5.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 80/1025] blk.5.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 81/1025] blk.5.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 82/1025] blk.5.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 83/1025] blk.5.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 84/1025] blk.5.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 85/1025] blk.5.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 86/1025] blk.5.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 87/1025] blk.5.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 88/1025] blk.5.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 89/1025] blk.5.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 90/1025] blk.5.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 91/1025] blk.6.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 92/1025] blk.6.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 93/1025] blk.6.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 94/1025] blk.6.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 95/1025] blk.6.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 96/1025] blk.6.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 97/1025] blk.6.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 98/1025] blk.6.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 99/1025] blk.6.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 100/1025] blk.6.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 101/1025] blk.6.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 102/1025] blk.6.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 103/1025] blk.6.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 104/1025] blk.6.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 105/1025] blk.6.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 106/1025] blk.6.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 107/1025] blk.6.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 108/1025] blk.7.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 109/1025] blk.7.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 110/1025] blk.7.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 111/1025] blk.7.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 112/1025] blk.7.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 113/1025] blk.7.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 114/1025] blk.7.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 115/1025] blk.7.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 116/1025] blk.7.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 117/1025] blk.7.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 118/1025] blk.7.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 119/1025] blk.7.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 120/1025] blk.7.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 121/1025] blk.7.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 122/1025] blk.7.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 123/1025] blk.7.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 124/1025] blk.7.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 125/1025] blk.8.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 126/1025] blk.8.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 127/1025] blk.8.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 128/1025] blk.8.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 129/1025] blk.8.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 130/1025] blk.8.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 131/1025] blk.8.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 132/1025] blk.8.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 133/1025] blk.8.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 134/1025] blk.8.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 135/1025] blk.8.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 136/1025] blk.8.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 137/1025] blk.8.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 138/1025] blk.8.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 139/1025] blk.8.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 140/1025] blk.8.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 141/1025] blk.8.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 142/1025] blk.9.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 143/1025] blk.9.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 144/1025] blk.9.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 145/1025] blk.9.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 146/1025] blk.9.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 147/1025] blk.9.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 148/1025] blk.9.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 149/1025] blk.9.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 150/1025] blk.9.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 151/1025] blk.9.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 152/1025] blk.9.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 153/1025] blk.9.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 154/1025] blk.9.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 155/1025] blk.9.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 156/1025] blk.9.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 157/1025] blk.9.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 158/1025] blk.9.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 159/1025] blk.10.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 160/1025] blk.10.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 161/1025] blk.10.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 162/1025] blk.10.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 163/1025] blk.10.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 164/1025] blk.10.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 165/1025] blk.10.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 166/1025] blk.10.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 167/1025] blk.10.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 168/1025] blk.10.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 169/1025] blk.10.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 170/1025] blk.10.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 171/1025] blk.10.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 172/1025] blk.10.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 173/1025] blk.10.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 174/1025] blk.10.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 175/1025] blk.10.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 176/1025] blk.11.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 177/1025] blk.11.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 178/1025] blk.11.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 179/1025] blk.11.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 180/1025] blk.11.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 181/1025] blk.11.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 182/1025] blk.11.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 183/1025] blk.11.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 184/1025] blk.11.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 185/1025] blk.11.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 186/1025] blk.11.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 187/1025] blk.11.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 188/1025] blk.11.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 189/1025] blk.11.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 190/1025] blk.11.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 191/1025] blk.11.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 192/1025] blk.11.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 193/1025] blk.12.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 194/1025] blk.12.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 195/1025] blk.12.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 196/1025] blk.12.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 197/1025] blk.12.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 198/1025] blk.12.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 199/1025] blk.12.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 200/1025] blk.12.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 201/1025] blk.12.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 202/1025] blk.12.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 203/1025] blk.12.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 204/1025] blk.12.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 205/1025] blk.12.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 206/1025] blk.12.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 207/1025] blk.12.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 208/1025] blk.12.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 209/1025] blk.12.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 210/1025] blk.13.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 211/1025] blk.13.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 212/1025] blk.13.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 213/1025] blk.13.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 214/1025] blk.13.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 215/1025] blk.13.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 216/1025] blk.13.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 217/1025] blk.13.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 218/1025] blk.13.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 219/1025] blk.13.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 220/1025] blk.13.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 221/1025] blk.13.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 222/1025] blk.13.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 223/1025] blk.13.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 224/1025] blk.13.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 225/1025] blk.13.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 226/1025] blk.13.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 227/1025] blk.14.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 228/1025] blk.14.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 229/1025] blk.14.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 230/1025] blk.14.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 231/1025] blk.14.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 232/1025] blk.14.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 233/1025] blk.14.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 234/1025] blk.14.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 235/1025] blk.14.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 236/1025] blk.14.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 237/1025] blk.14.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 238/1025] blk.14.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 239/1025] blk.14.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 240/1025] blk.14.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 241/1025] blk.14.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 242/1025] blk.14.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 243/1025] blk.14.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 244/1025] blk.15.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 245/1025] blk.15.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 246/1025] blk.15.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 247/1025] blk.15.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 248/1025] blk.15.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 249/1025] blk.15.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 250/1025] blk.15.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 251/1025] blk.15.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 252/1025] blk.15.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 253/1025] blk.15.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 254/1025] blk.15.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 255/1025] blk.15.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 256/1025] blk.15.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 257/1025] blk.15.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 258/1025] blk.15.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 259/1025] blk.15.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 260/1025] blk.15.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 261/1025] blk.16.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 262/1025] blk.16.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 263/1025] blk.16.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 264/1025] blk.16.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 265/1025] blk.16.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 266/1025] blk.16.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 267/1025] blk.16.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 268/1025] blk.16.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 269/1025] blk.16.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 270/1025] blk.16.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 271/1025] blk.16.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 272/1025] blk.16.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 273/1025] blk.16.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 274/1025] blk.16.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 275/1025] blk.16.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 276/1025] blk.16.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 277/1025] blk.16.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 278/1025] blk.17.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 279/1025] blk.17.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 280/1025] blk.17.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 281/1025] blk.17.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 282/1025] blk.17.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 283/1025] blk.17.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 284/1025] blk.17.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 285/1025] blk.17.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 286/1025] blk.17.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 287/1025] blk.17.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 288/1025] blk.17.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 289/1025] blk.17.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 290/1025] blk.17.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 291/1025] blk.17.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 292/1025] blk.17.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 293/1025] blk.17.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 294/1025] blk.17.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 295/1025] blk.18.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 296/1025] blk.18.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 297/1025] blk.18.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 298/1025] blk.18.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 299/1025] blk.18.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 300/1025] blk.18.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 301/1025] blk.18.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 302/1025] blk.18.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 303/1025] blk.18.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 304/1025] blk.18.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 305/1025] blk.18.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 306/1025] blk.18.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 307/1025] blk.18.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 308/1025] blk.18.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 309/1025] blk.18.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 310/1025] blk.18.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 311/1025] blk.18.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 312/1025] blk.19.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 313/1025] blk.19.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 314/1025] blk.19.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 315/1025] blk.19.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 316/1025] blk.19.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 317/1025] blk.19.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 318/1025] blk.19.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 319/1025] blk.19.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 320/1025] blk.19.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 321/1025] blk.19.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 322/1025] blk.19.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 323/1025] blk.19.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 324/1025] blk.19.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 325/1025] blk.19.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 326/1025] blk.19.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 327/1025] blk.19.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 328/1025] blk.19.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 329/1025] blk.20.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 330/1025] blk.20.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 331/1025] blk.20.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 332/1025] blk.20.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 333/1025] blk.20.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 334/1025] blk.20.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 335/1025] blk.20.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 336/1025] blk.20.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 337/1025] blk.20.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 338/1025] blk.20.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 339/1025] blk.20.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 340/1025] blk.20.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 341/1025] blk.20.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 342/1025] blk.20.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 343/1025] blk.20.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 344/1025] blk.20.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 345/1025] blk.20.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 346/1025] blk.21.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 347/1025] blk.21.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 348/1025] blk.21.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 349/1025] blk.21.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 350/1025] blk.21.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 351/1025] blk.21.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 352/1025] blk.21.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 353/1025] blk.21.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 354/1025] blk.21.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 355/1025] blk.21.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 356/1025] blk.21.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 357/1025] blk.21.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 358/1025] blk.21.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 359/1025] blk.21.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 360/1025] blk.21.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 361/1025] blk.21.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 362/1025] blk.21.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 363/1025] blk.22.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 364/1025] blk.22.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 365/1025] blk.22.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 366/1025] blk.22.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 367/1025] blk.22.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 368/1025] blk.22.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 369/1025] blk.22.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 370/1025] blk.22.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 371/1025] blk.22.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 372/1025] blk.22.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 373/1025] blk.22.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 374/1025] blk.22.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 375/1025] blk.22.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 376/1025] blk.22.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 377/1025] blk.22.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 378/1025] blk.22.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 379/1025] blk.22.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 380/1025] blk.23.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 381/1025] blk.23.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 382/1025] blk.23.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 383/1025] blk.23.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 384/1025] blk.23.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 385/1025] blk.23.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 386/1025] blk.23.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 387/1025] blk.23.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 388/1025] blk.23.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 389/1025] blk.23.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 390/1025] blk.23.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 391/1025] blk.23.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 392/1025] blk.23.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 393/1025] blk.23.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 394/1025] blk.23.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 395/1025] blk.23.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 396/1025] blk.23.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 397/1025] blk.24.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 398/1025] blk.24.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 399/1025] blk.24.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 400/1025] blk.24.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 401/1025] blk.24.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 402/1025] blk.24.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 403/1025] blk.24.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 404/1025] blk.24.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 405/1025] blk.24.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 406/1025] blk.24.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 407/1025] blk.24.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 408/1025] blk.24.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 409/1025] blk.24.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 410/1025] blk.24.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 411/1025] blk.24.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 412/1025] blk.24.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 413/1025] blk.24.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 414/1025] blk.25.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 415/1025] blk.25.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 416/1025] blk.25.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 417/1025] blk.25.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 418/1025] blk.25.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 419/1025] blk.25.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 420/1025] blk.25.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 421/1025] blk.25.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 422/1025] blk.25.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 423/1025] blk.25.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 424/1025] blk.25.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 425/1025] blk.25.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 426/1025] blk.25.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 427/1025] blk.25.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 428/1025] blk.25.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 429/1025] blk.25.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 430/1025] blk.25.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 431/1025] blk.26.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 432/1025] blk.26.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 433/1025] blk.26.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 434/1025] blk.26.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 435/1025] blk.26.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 436/1025] blk.26.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 437/1025] blk.26.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 438/1025] blk.26.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 439/1025] blk.26.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 440/1025] blk.26.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 441/1025] blk.26.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 442/1025] blk.26.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 443/1025] blk.26.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 444/1025] blk.26.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 445/1025] blk.26.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 446/1025] blk.26.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 447/1025] blk.26.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 448/1025] blk.27.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 449/1025] blk.27.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 450/1025] blk.27.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 451/1025] blk.27.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 452/1025] blk.27.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 453/1025] blk.27.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 454/1025] blk.27.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 455/1025] blk.27.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 456/1025] blk.27.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 457/1025] blk.27.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 458/1025] blk.27.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 459/1025] blk.27.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 460/1025] blk.27.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 461/1025] blk.27.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 462/1025] blk.27.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 463/1025] blk.27.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 464/1025] blk.27.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 465/1025] blk.28.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 466/1025] blk.28.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 467/1025] blk.28.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 468/1025] blk.28.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 469/1025] blk.28.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 470/1025] blk.28.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 471/1025] blk.28.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 472/1025] blk.28.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 473/1025] blk.28.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 474/1025] blk.28.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 475/1025] blk.28.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 476/1025] blk.28.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 477/1025] blk.28.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 478/1025] blk.28.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 479/1025] blk.28.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 480/1025] blk.28.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 481/1025] blk.28.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 482/1025] blk.29.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 483/1025] blk.29.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 484/1025] blk.29.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 485/1025] blk.29.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 486/1025] blk.29.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 487/1025] blk.29.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 488/1025] blk.29.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 489/1025] blk.29.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 490/1025] blk.29.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 491/1025] blk.29.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 492/1025] blk.29.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 493/1025] blk.29.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 494/1025] blk.29.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 495/1025] blk.29.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 496/1025] blk.29.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 497/1025] blk.29.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 498/1025] blk.29.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 499/1025] blk.30.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 500/1025] blk.30.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 501/1025] blk.30.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 502/1025] blk.30.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 503/1025] blk.30.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 504/1025] blk.30.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 505/1025] blk.30.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 506/1025] blk.30.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 507/1025] blk.30.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 508/1025] blk.30.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 509/1025] blk.30.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 510/1025] blk.30.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 511/1025] blk.30.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 512/1025] blk.30.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 513/1025] blk.30.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 514/1025] blk.30.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 515/1025] blk.30.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 516/1025] blk.31.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 517/1025] blk.31.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 518/1025] blk.31.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 519/1025] blk.31.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 520/1025] blk.31.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 521/1025] blk.31.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 522/1025] blk.31.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 523/1025] blk.31.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 524/1025] blk.31.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 525/1025] blk.31.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 526/1025] blk.31.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 527/1025] blk.31.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 528/1025] blk.31.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 529/1025] blk.31.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 530/1025] blk.31.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 531/1025] blk.31.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 532/1025] blk.31.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 533/1025] blk.32.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 534/1025] blk.32.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 535/1025] blk.32.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 536/1025] blk.32.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 537/1025] blk.32.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 538/1025] blk.32.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 539/1025] blk.32.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 540/1025] blk.32.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 541/1025] blk.32.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 542/1025] blk.32.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 543/1025] blk.32.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 544/1025] blk.32.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 545/1025] blk.32.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 546/1025] blk.32.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 547/1025] blk.32.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 548/1025] blk.32.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 549/1025] blk.32.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 550/1025] blk.33.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 551/1025] blk.33.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 552/1025] blk.33.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 553/1025] blk.33.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 554/1025] blk.33.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 555/1025] blk.33.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 556/1025] blk.33.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 557/1025] blk.33.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 558/1025] blk.33.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 559/1025] blk.33.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 560/1025] blk.33.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 561/1025] blk.33.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 562/1025] blk.33.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 563/1025] blk.33.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 564/1025] blk.33.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 565/1025] blk.33.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 566/1025] blk.33.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 567/1025] blk.34.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 568/1025] blk.34.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 569/1025] blk.34.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 570/1025] blk.34.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 571/1025] blk.34.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 572/1025] blk.34.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 573/1025] blk.34.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 574/1025] blk.34.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 575/1025] blk.34.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 576/1025] blk.34.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 577/1025] blk.34.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 578/1025] blk.34.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 579/1025] blk.34.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 580/1025] blk.34.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 581/1025] blk.34.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 582/1025] blk.34.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 583/1025] blk.34.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 584/1025] blk.35.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 585/1025] blk.35.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 586/1025] blk.35.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 587/1025] blk.35.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 588/1025] blk.35.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 589/1025] blk.35.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 590/1025] blk.35.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 591/1025] blk.35.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 592/1025] blk.35.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 593/1025] blk.35.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 594/1025] blk.35.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 595/1025] blk.35.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 596/1025] blk.35.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 597/1025] blk.35.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 598/1025] blk.35.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 599/1025] blk.35.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 600/1025] blk.35.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 601/1025] blk.36.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 602/1025] blk.36.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 603/1025] blk.36.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 604/1025] blk.36.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 605/1025] blk.36.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 606/1025] blk.36.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 607/1025] blk.36.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 608/1025] blk.36.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 609/1025] blk.36.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 610/1025] blk.36.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 611/1025] blk.36.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 612/1025] blk.36.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 613/1025] blk.36.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 614/1025] blk.36.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 615/1025] blk.36.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 616/1025] blk.36.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 617/1025] blk.36.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 618/1025] blk.37.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 619/1025] blk.37.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 620/1025] blk.37.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 621/1025] blk.37.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 622/1025] blk.37.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 623/1025] blk.37.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 624/1025] blk.37.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 625/1025] blk.37.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 626/1025] blk.37.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 627/1025] blk.37.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 628/1025] blk.37.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 629/1025] blk.37.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 630/1025] blk.37.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 631/1025] blk.37.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 632/1025] blk.37.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 633/1025] blk.37.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 634/1025] blk.37.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 635/1025] blk.38.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 636/1025] blk.38.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 637/1025] blk.38.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 638/1025] blk.38.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 639/1025] blk.38.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 640/1025] blk.38.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 641/1025] blk.38.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 642/1025] blk.38.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 643/1025] blk.38.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 644/1025] blk.38.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 645/1025] blk.38.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 646/1025] blk.38.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 647/1025] blk.38.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 648/1025] blk.38.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 649/1025] blk.38.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 650/1025] blk.38.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 651/1025] blk.38.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 652/1025] blk.39.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 653/1025] blk.39.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 654/1025] blk.39.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 655/1025] blk.39.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 656/1025] blk.39.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 657/1025] blk.39.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 658/1025] blk.39.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 659/1025] blk.39.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 660/1025] blk.39.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 661/1025] blk.39.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 662/1025] blk.39.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 663/1025] blk.39.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 664/1025] blk.39.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 665/1025] blk.39.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 666/1025] blk.39.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 667/1025] blk.39.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 668/1025] blk.39.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 669/1025] blk.40.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 670/1025] blk.40.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 671/1025] blk.40.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 672/1025] blk.40.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 673/1025] blk.40.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 674/1025] blk.40.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 675/1025] blk.40.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 676/1025] blk.40.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 677/1025] blk.40.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 678/1025] blk.40.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 679/1025] blk.40.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 680/1025] blk.40.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 681/1025] blk.40.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 682/1025] blk.40.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 683/1025] blk.40.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 684/1025] blk.40.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 685/1025] blk.40.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 686/1025] blk.41.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 687/1025] blk.41.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 688/1025] blk.41.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 689/1025] blk.41.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 690/1025] blk.41.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 691/1025] blk.41.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 692/1025] blk.41.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 693/1025] blk.41.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 694/1025] blk.41.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 695/1025] blk.41.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 696/1025] blk.41.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 697/1025] blk.41.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 698/1025] blk.41.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 699/1025] blk.41.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 700/1025] blk.41.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 701/1025] blk.41.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 702/1025] blk.41.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 703/1025] blk.42.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 704/1025] blk.42.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 705/1025] blk.42.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 706/1025] blk.42.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 707/1025] blk.42.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 708/1025] blk.42.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 709/1025] blk.42.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 710/1025] blk.42.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 711/1025] blk.42.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 712/1025] blk.42.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 713/1025] blk.42.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 714/1025] blk.42.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 715/1025] blk.42.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 716/1025] blk.42.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 717/1025] blk.42.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 718/1025] blk.42.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 719/1025] blk.42.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 720/1025] blk.43.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 721/1025] blk.43.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 722/1025] blk.43.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 723/1025] blk.43.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 724/1025] blk.43.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 725/1025] blk.43.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 726/1025] blk.43.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 727/1025] blk.43.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 728/1025] blk.43.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 729/1025] blk.43.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 730/1025] blk.43.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 731/1025] blk.43.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 732/1025] blk.43.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 733/1025] blk.43.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 734/1025] blk.43.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 735/1025] blk.43.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 736/1025] blk.43.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 737/1025] blk.44.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 738/1025] blk.44.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 739/1025] blk.44.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 740/1025] blk.44.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 741/1025] blk.44.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 742/1025] blk.44.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 743/1025] blk.44.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 744/1025] blk.44.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 745/1025] blk.44.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 746/1025] blk.44.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 747/1025] blk.44.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 748/1025] blk.44.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 749/1025] blk.44.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 750/1025] blk.44.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 751/1025] blk.44.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 752/1025] blk.44.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 753/1025] blk.44.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 754/1025] blk.45.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 755/1025] blk.45.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 756/1025] blk.45.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 757/1025] blk.45.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 758/1025] blk.45.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 759/1025] blk.45.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 760/1025] blk.45.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 761/1025] blk.45.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 762/1025] blk.45.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 763/1025] blk.45.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 764/1025] blk.45.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 765/1025] blk.45.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 766/1025] blk.45.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 767/1025] blk.45.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 768/1025] blk.45.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 769/1025] blk.45.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 770/1025] blk.45.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 771/1025] blk.46.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 772/1025] blk.46.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 773/1025] blk.46.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 774/1025] blk.46.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 775/1025] blk.46.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 776/1025] blk.46.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 777/1025] blk.46.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 778/1025] blk.46.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 779/1025] blk.46.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 780/1025] blk.46.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 781/1025] blk.46.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 782/1025] blk.46.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 783/1025] blk.46.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 784/1025] blk.46.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 785/1025] blk.46.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 786/1025] blk.46.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 787/1025] blk.46.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 788/1025] blk.47.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 789/1025] blk.47.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 790/1025] blk.47.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 791/1025] blk.47.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 792/1025] blk.47.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 793/1025] blk.47.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 794/1025] blk.47.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 795/1025] blk.47.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 796/1025] blk.47.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 797/1025] blk.47.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 798/1025] blk.47.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 799/1025] blk.47.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 800/1025] blk.47.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 801/1025] blk.47.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 802/1025] blk.47.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 803/1025] blk.47.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 804/1025] blk.47.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 805/1025] blk.48.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 806/1025] blk.48.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 807/1025] blk.48.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 808/1025] blk.48.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 809/1025] blk.48.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 810/1025] blk.48.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 811/1025] blk.48.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 812/1025] blk.48.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 813/1025] blk.48.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 814/1025] blk.48.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 815/1025] blk.48.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 816/1025] blk.48.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 817/1025] blk.48.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 818/1025] blk.48.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 819/1025] blk.48.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 820/1025] blk.48.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 821/1025] blk.48.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 822/1025] blk.49.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 823/1025] blk.49.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 824/1025] blk.49.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 825/1025] blk.49.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 826/1025] blk.49.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 827/1025] blk.49.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 828/1025] blk.49.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 829/1025] blk.49.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 830/1025] blk.49.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 831/1025] blk.49.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 832/1025] blk.49.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 833/1025] blk.49.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 834/1025] blk.49.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 835/1025] blk.49.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 836/1025] blk.49.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 837/1025] blk.49.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 838/1025] blk.49.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 839/1025] blk.50.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 840/1025] blk.50.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 841/1025] blk.50.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 842/1025] blk.50.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 843/1025] blk.50.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 844/1025] blk.50.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 845/1025] blk.50.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 846/1025] blk.50.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 847/1025] blk.50.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 848/1025] blk.50.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 849/1025] blk.50.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 850/1025] blk.50.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 851/1025] blk.50.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 852/1025] blk.50.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 853/1025] blk.50.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 854/1025] blk.50.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 855/1025] blk.50.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 856/1025] blk.51.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 857/1025] blk.51.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 858/1025] blk.51.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 859/1025] blk.51.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 860/1025] blk.51.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 861/1025] blk.51.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 862/1025] blk.51.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 863/1025] blk.51.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 864/1025] blk.51.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 865/1025] blk.51.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 866/1025] blk.51.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 867/1025] blk.51.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 868/1025] blk.51.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 869/1025] blk.51.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 870/1025] blk.51.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 871/1025] blk.51.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 872/1025] blk.51.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 873/1025] blk.52.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 874/1025] blk.52.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 875/1025] blk.52.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 876/1025] blk.52.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 877/1025] blk.52.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 878/1025] blk.52.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 879/1025] blk.52.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 880/1025] blk.52.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 881/1025] blk.52.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 882/1025] blk.52.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 883/1025] blk.52.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 884/1025] blk.52.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 885/1025] blk.52.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 886/1025] blk.52.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 887/1025] blk.52.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 888/1025] blk.52.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 889/1025] blk.52.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 890/1025] blk.53.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 891/1025] blk.53.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 892/1025] blk.53.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 893/1025] blk.53.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 894/1025] blk.53.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 895/1025] blk.53.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 896/1025] blk.53.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 897/1025] blk.53.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 898/1025] blk.53.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 899/1025] blk.53.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 900/1025] blk.53.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 901/1025] blk.53.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 902/1025] blk.53.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 903/1025] blk.53.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 904/1025] blk.53.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 905/1025] blk.53.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 906/1025] blk.53.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 907/1025] blk.54.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 908/1025] blk.54.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 909/1025] blk.54.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 910/1025] blk.54.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 911/1025] blk.54.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 912/1025] blk.54.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 913/1025] blk.54.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 914/1025] blk.54.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 915/1025] blk.54.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 916/1025] blk.54.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 917/1025] blk.54.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 918/1025] blk.54.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 919/1025] blk.54.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 920/1025] blk.54.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 921/1025] blk.54.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 922/1025] blk.54.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 923/1025] blk.54.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 924/1025] blk.55.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 925/1025] blk.55.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 926/1025] blk.55.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 927/1025] blk.55.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 928/1025] blk.55.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 929/1025] blk.55.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 930/1025] blk.55.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 931/1025] blk.55.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 932/1025] blk.55.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 933/1025] blk.55.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 934/1025] blk.55.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 935/1025] blk.55.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 936/1025] blk.55.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 937/1025] blk.55.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 938/1025] blk.55.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 939/1025] blk.55.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 940/1025] blk.55.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 941/1025] blk.56.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 942/1025] blk.56.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 943/1025] blk.56.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 944/1025] blk.56.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 945/1025] blk.56.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 946/1025] blk.56.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 947/1025] blk.56.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 948/1025] blk.56.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 949/1025] blk.56.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 950/1025] blk.56.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 951/1025] blk.56.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 952/1025] blk.56.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 953/1025] blk.56.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 954/1025] blk.56.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 955/1025] blk.56.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 956/1025] blk.56.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 957/1025] blk.56.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 958/1025] blk.57.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 959/1025] blk.57.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 960/1025] blk.57.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 961/1025] blk.57.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 962/1025] blk.57.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 963/1025] blk.57.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 964/1025] blk.57.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 965/1025] blk.57.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 966/1025] blk.57.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 967/1025] blk.57.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 968/1025] blk.57.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 969/1025] blk.57.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 970/1025] blk.57.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 971/1025] blk.57.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 972/1025] blk.57.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 973/1025] blk.57.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 974/1025] blk.57.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 975/1025] blk.58.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 976/1025] blk.58.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 977/1025] blk.58.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 978/1025] blk.58.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 979/1025] blk.58.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 980/1025] blk.58.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 981/1025] blk.58.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 982/1025] blk.58.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [ 983/1025] blk.58.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [ 984/1025] blk.58.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [ 985/1025] blk.58.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [ 986/1025] blk.58.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 987/1025] blk.58.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [ 988/1025] blk.58.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 989/1025] blk.58.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 990/1025] blk.58.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [ 991/1025] blk.58.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [ 992/1025] blk.59.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [ 993/1025] blk.59.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [ 994/1025] blk.59.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [ 995/1025] blk.59.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [ 996/1025] blk.59.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [ 997/1025] blk.59.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [ 998/1025] blk.59.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [ 999/1025] blk.59.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [1000/1025] blk.59.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [1001/1025] blk.59.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [1002/1025] blk.59.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [1003/1025] blk.59.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [1004/1025] blk.59.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [1005/1025] blk.59.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [1006/1025] blk.59.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [1007/1025] blk.59.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [1008/1025] blk.59.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [1009/1025] blk.60.attn_kv_a_mqa.weight - [ 7168, 576, 1, 1], type = q6_K, size = 3.230 MB, type = q6_K
- [1010/1025] blk.60.attn_kv_a_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MB, type = f32
- [1011/1025] blk.60.attn_kv_b.weight - [ 512, 32768, 1, 1], type = q6_K, size = 13.125 MB, type = q6_K
- [1012/1025] blk.60.attn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [1013/1025] blk.60.attn_output.weight - [16384, 7168, 1, 1], type = q4_K, size = 63.000 MB, type = q4_K
- [1014/1025] blk.60.attn_q_a.weight - [ 7168, 1536, 1, 1], type = q4_K, size = 5.906 MB, type = q4_K
- [1015/1025] blk.60.attn_q_a_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB, type = f32
- [1016/1025] blk.60.attn_q_b.weight - [ 1536, 24576, 1, 1], type = q4_K, size = 20.250 MB, type = q4_K
- [1017/1025] blk.60.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB, type = f32
- [1018/1025] blk.60.ffn_down_exps.weight - [ 2048, 7168, 256, 1], type = q6_K, size = 2940.000 MB, type = q6_k_r4
- [1019/1025] blk.60.ffn_down_shexp.weight - [ 2048, 7168, 1, 1], type = q6_K, size = 11.484 MB, type = q6_K
- [1020/1025] blk.60.ffn_gate_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [1021/1025] blk.60.ffn_gate_inp.weight - [ 7168, 256, 1, 1], type = f32, size = 7.000 MB, type = f32
- [1022/1025] blk.60.ffn_gate_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- [1023/1025] blk.60.ffn_norm.weight - [ 7168, 1, 1, 1], type = f32, size = 0.027 MB, type = f32
- [1024/1025] blk.60.ffn_up_exps.weight - [ 7168, 2048, 256, 1], type = q4_K, size = 2016.000 MB, type = q4_k_r4
- [1025/1025] blk.60.ffn_up_shexp.weight - [ 7168, 2048, 1, 1], type = q4_K, size = 7.875 MB, type = q4_K
- llama_model_quantize_internal: model size = 386115.06 MB
- llama_model_quantize_internal: quant size = 386115.06 MB
- main: quantize time = 5230179.40 ms
- main: total time = 5230179.40 ms
Advertisement
Add Comment
Please, Sign In to add comment