Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ExportedProgram:
- class GraphModule(torch.nn.Module):
- def forward(self, p_model_model_layers_slice_none__2__none____modules__0___input_layernorm_weight: "bf16[16]", p_model_model_layers_slice_none__2__none____modules__0___post_attention_layernorm_weight: "bf16[16]", p_model_model_layers_slice_none__2__none____modules__1___input_layernorm_weight: "bf16[16]", p_model_model_layers_slice_none__2__none____modules__1___post_attention_layernorm_weight: "bf16[16]", p_model_model_norm_weight: "bf16[16]", p_model_model_embed_tokens_weight: "bf16[32000, 16]", p_model_model_layers_slice_none__2__none____modules__0___self_attn_q_proj_weight: "bf16[16, 16]", p_model_model_layers_slice_none__2__none____modules__0___self_attn_k_proj_weight: "bf16[16, 16]", p_model_model_layers_slice_none__2__none____modules__0___self_attn_v_proj_weight: "bf16[16, 16]", p_model_model_layers_slice_none__2__none____modules__0___self_attn_o_proj_weight: "bf16[16, 16]", p_model_model_layers_slice_none__2__none____modules__0___mlp_gate_proj_weight: "bf16[64, 16]", p_model_model_layers_slice_none__2__none____modules__0___mlp_up_proj_weight: "bf16[64, 16]", p_model_model_layers_slice_none__2__none____modules__0___mlp_down_proj_weight: "bf16[16, 64]", p_model_model_layers_slice_none__2__none____modules__1___self_attn_q_proj_weight: "bf16[16, 16]", p_model_model_layers_slice_none__2__none____modules__1___self_attn_k_proj_weight: "bf16[16, 16]", p_model_model_layers_slice_none__2__none____modules__1___self_attn_v_proj_weight: "bf16[16, 16]", p_model_model_layers_slice_none__2__none____modules__1___self_attn_o_proj_weight: "bf16[16, 16]", p_model_model_layers_slice_none__2__none____modules__1___mlp_gate_proj_weight: "bf16[64, 16]", p_model_model_layers_slice_none__2__none____modules__1___mlp_up_proj_weight: "bf16[64, 16]", p_model_model_layers_slice_none__2__none____modules__1___mlp_down_proj_weight: "bf16[16, 64]", p_model_lm_head_weight: "bf16[32000, 16]", b_mask: "b8[1234, 1234]", b___static_cache_key_cache_0: "bf16[1, 4, 1234, 4]", b_model_model_rotary_emb_inv_freq: "f32[2]", b___static_cache_value_cache_0: "bf16[1, 4, 1234, 4]", b___static_cache_key_cache_1: "bf16[1, 4, 1234, 4]", b___static_cache_value_cache_1: "bf16[1, 4, 1234, 4]", input_ids: "i64[1, 1]", cache_position: "i64[1]"):
- # File: /home/ilyas/transformers/src/transformers/integrations/executorch.py:112 in forward, code: attn_mask = self.mask[cache_position, :seqlen] if self.is_causal else None
- slice_1: "b8[1234, 1]" = torch.ops.aten.slice.Tensor(b_mask, 1, 0, 1); b_mask = None
- index: "b8[1, 1]" = torch.ops.aten.index.Tensor(slice_1, [cache_position]); slice_1 = None
- # File: /home/ilyas/transformers/src/transformers/integrations/executorch.py:113 in forward, code: position_ids = cache_position.unsqueeze(0)
- unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(cache_position, 0)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:550 in forward, code: inputs_embeds = self.embed_tokens(input_ids)
- embedding: "bf16[1, 1, 16]" = torch.ops.aten.embedding.default(p_model_model_embed_tokens_weight, input_ids, 31999); p_model_model_embed_tokens_weight = input_ids = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:728 in _prepare_4d_causal_attention_mask_with_cache_position, code: causal_mask = torch.full(
- full: "bf16[1, 1234]" = torch.ops.aten.full.default([1, 1234], -3.3895313892515355e+38, dtype = torch.bfloat16, device = device(type='cpu'), pin_memory = False)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:733 in _prepare_4d_causal_attention_mask_with_cache_position, code: causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
- arange: "i64[1234]" = torch.ops.aten.arange.default(1234, device = device(type='cpu'), pin_memory = False)
- view: "i64[1, 1]" = torch.ops.aten.view.default(cache_position, [-1, 1])
- gt: "b8[1, 1234]" = torch.ops.aten.gt.Tensor(arange, view); arange = view = None
- mul: "bf16[1, 1234]" = torch.ops.aten.mul.Tensor(full, gt); full = gt = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:736 in _prepare_4d_causal_attention_mask_with_cache_position, code: causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
- unsqueeze_3: "bf16[1, 1, 1234]" = torch.ops.aten.unsqueeze.default(mul, 0); mul = None
- unsqueeze_4: "bf16[1, 1, 1, 1234]" = torch.ops.aten.unsqueeze.default(unsqueeze_3, 1); unsqueeze_3 = None
- slice_4: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(unsqueeze_4, 2, 0, 9223372036854775807); unsqueeze_4 = None
- slice_5: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_4, 3, 0, 9223372036854775807); slice_4 = None
- expand_1: "bf16[1, 1, 1, 1234]" = torch.ops.aten.expand.default(slice_5, [1, 1, -1, -1]); slice_5 = None
- clone: "bf16[1, 1, 1, 1234]" = torch.ops.aten.clone.default(expand_1); expand_1 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:738 in _prepare_4d_causal_attention_mask_with_cache_position, code: padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :]
- slice_6: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(clone, 0, 0, 9223372036854775807)
- slice_7: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_6, 1, 0, 9223372036854775807); slice_6 = None
- slice_8: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_7, 2, 0, 9223372036854775807); slice_7 = None
- slice_9: "bf16[1, 1, 1, 1]" = torch.ops.aten.slice.Tensor(slice_8, 3, 0, 1); slice_8 = None
- slice_10: "b8[1, 1]" = torch.ops.aten.slice.Tensor(index, 0, 0, 9223372036854775807); index = None
- unsqueeze_5: "b8[1, 1, 1]" = torch.ops.aten.unsqueeze.default(slice_10, 1); slice_10 = None
- unsqueeze_6: "b8[1, 1, 1, 1]" = torch.ops.aten.unsqueeze.default(unsqueeze_5, 2); unsqueeze_5 = None
- slice_11: "b8[1, 1, 1, 1]" = torch.ops.aten.slice.Tensor(unsqueeze_6, 3, 0, 9223372036854775807); unsqueeze_6 = None
- add: "bf16[1, 1, 1, 1]" = torch.ops.aten.add.Tensor(slice_9, slice_11); slice_9 = slice_11 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:739 in _prepare_4d_causal_attention_mask_with_cache_position, code: padding_mask = padding_mask == 0
- eq: "b8[1, 1, 1, 1]" = torch.ops.aten.eq.Scalar(add, 0); add = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:740 in _prepare_4d_causal_attention_mask_with_cache_position, code: causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
- slice_12: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(clone, 0, 0, 9223372036854775807)
- slice_13: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_12, 1, 0, 9223372036854775807); slice_12 = None
- slice_14: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_13, 2, 0, 9223372036854775807); slice_13 = None
- slice_15: "bf16[1, 1, 1, 1]" = torch.ops.aten.slice.Tensor(slice_14, 3, 0, 1); slice_14 = None
- masked_fill: "bf16[1, 1, 1, 1]" = torch.ops.aten.masked_fill.Scalar(slice_15, eq, -3.3895313892515355e+38); slice_15 = eq = None
- slice_16: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(clone, 0, 0, 9223372036854775807)
- slice_17: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_16, 1, 0, 9223372036854775807); slice_16 = None
- slice_18: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_17, 2, 0, 9223372036854775807); slice_17 = None
- slice_19: "bf16[1, 1, 1, 1]" = torch.ops.aten.slice.Tensor(slice_18, 3, 0, 1); slice_18 = None
- copy: "bf16[1, 1, 1, 1]" = torch.ops.aten.copy.default(slice_19, masked_fill); slice_19 = masked_fill = None
- slice_20: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(clone, 0, 0, 9223372036854775807)
- slice_21: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_20, 1, 0, 9223372036854775807)
- slice_22: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_21, 2, 0, 9223372036854775807)
- slice_scatter: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice_scatter.default(slice_22, copy, 3, 0, 1); slice_22 = copy = None
- slice_scatter_1: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice_scatter.default(slice_21, slice_scatter, 2, 0, 9223372036854775807); slice_21 = slice_scatter = None
- slice_scatter_2: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice_scatter.default(slice_20, slice_scatter_1, 1, 0, 9223372036854775807); slice_20 = slice_scatter_1 = None
- slice_scatter_3: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice_scatter.default(clone, slice_scatter_2, 0, 0, 9223372036854775807); clone = slice_scatter_2 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:126 in forward, code: inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
- unsqueeze_7: "f32[1, 2]" = torch.ops.aten.unsqueeze.default(b_model_model_rotary_emb_inv_freq, 0); b_model_model_rotary_emb_inv_freq = None
- slice_27: "f32[1, 2]" = torch.ops.aten.slice.Tensor(unsqueeze_7, 1, 0, 9223372036854775807); unsqueeze_7 = None
- unsqueeze_8: "f32[1, 2, 1]" = torch.ops.aten.unsqueeze.default(slice_27, 2); slice_27 = None
- _to_copy: "f32[1, 2, 1]" = torch.ops.aten._to_copy.default(unsqueeze_8, dtype = torch.float32); unsqueeze_8 = None
- expand_2: "f32[1, 2, 1]" = torch.ops.aten.expand.default(_to_copy, [1, -1, 1]); _to_copy = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:127 in forward, code: position_ids_expanded = position_ids[:, None, :].float()
- slice_28: "i64[1, 1]" = torch.ops.aten.slice.Tensor(unsqueeze, 0, 0, 9223372036854775807); unsqueeze = None
- unsqueeze_9: "i64[1, 1, 1]" = torch.ops.aten.unsqueeze.default(slice_28, 1); slice_28 = None
- slice_29: "i64[1, 1, 1]" = torch.ops.aten.slice.Tensor(unsqueeze_9, 2, 0, 9223372036854775807); unsqueeze_9 = None
- _to_copy_1: "f32[1, 1, 1]" = torch.ops.aten._to_copy.default(slice_29, dtype = torch.float32); slice_29 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:132 in forward, code: freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
- _to_copy_2: "f32[1, 2, 1]" = torch.ops.aten._to_copy.default(expand_2, dtype = torch.float32); expand_2 = None
- _to_copy_3: "f32[1, 1, 1]" = torch.ops.aten._to_copy.default(_to_copy_1, dtype = torch.float32); _to_copy_1 = None
- expand_3: "f32[1, 2, 1]" = torch.ops.aten.expand.default(_to_copy_2, [1, 2, 1]); _to_copy_2 = None
- view_1: "f32[1, 2, 1]" = torch.ops.aten.view.default(expand_3, [1, 2, 1]); expand_3 = None
- expand_4: "f32[1, 1, 1]" = torch.ops.aten.expand.default(_to_copy_3, [1, 1, 1]); _to_copy_3 = None
- view_2: "f32[1, 1, 1]" = torch.ops.aten.view.default(expand_4, [1, 1, 1]); expand_4 = None
- bmm: "f32[1, 2, 1]" = torch.ops.aten.bmm.default(view_1, view_2); view_1 = view_2 = None
- view_3: "f32[1, 2, 1]" = torch.ops.aten.view.default(bmm, [1, 2, 1]); bmm = None
- transpose: "f32[1, 1, 2]" = torch.ops.aten.transpose.int(view_3, 1, 2); view_3 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:133 in forward, code: emb = torch.cat((freqs, freqs), dim=-1)
- cat: "f32[1, 1, 4]" = torch.ops.aten.cat.default([transpose, transpose], -1); transpose = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:134 in forward, code: cos = emb.cos()
- cos: "f32[1, 1, 4]" = torch.ops.aten.cos.default(cat)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:135 in forward, code: sin = emb.sin()
- sin: "f32[1, 1, 4]" = torch.ops.aten.sin.default(cat); cat = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:138 in forward, code: cos = cos * self.attention_scaling
- mul_1: "f32[1, 1, 4]" = torch.ops.aten.mul.Tensor(cos, 1.0); cos = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:139 in forward, code: sin = sin * self.attention_scaling
- mul_2: "f32[1, 1, 4]" = torch.ops.aten.mul.Tensor(sin, 1.0); sin = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:141 in forward, code: return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
- _to_copy_4: "bf16[1, 1, 4]" = torch.ops.aten._to_copy.default(mul_1, dtype = torch.bfloat16); mul_1 = None
- _to_copy_5: "bf16[1, 1, 4]" = torch.ops.aten._to_copy.default(mul_2, dtype = torch.bfloat16); mul_2 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:71 in forward, code: hidden_states = hidden_states.to(torch.float32)
- _to_copy_6: "f32[1, 1, 16]" = torch.ops.aten._to_copy.default(embedding, dtype = torch.float32)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:72 in forward, code: variance = hidden_states.pow(2).mean(-1, keepdim=True)
- pow_1: "f32[1, 1, 16]" = torch.ops.aten.pow.Tensor_Scalar(_to_copy_6, 2)
- mean: "f32[1, 1, 1]" = torch.ops.aten.mean.dim(pow_1, [-1], True); pow_1 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:73 in forward, code: hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
- add_1: "f32[1, 1, 1]" = torch.ops.aten.add.Tensor(mean, 1e-06); mean = None
- rsqrt: "f32[1, 1, 1]" = torch.ops.aten.rsqrt.default(add_1); add_1 = None
- mul_3: "f32[1, 1, 16]" = torch.ops.aten.mul.Tensor(_to_copy_6, rsqrt); _to_copy_6 = rsqrt = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:74 in forward, code: return self.weight * hidden_states.to(input_dtype)
- _to_copy_7: "bf16[1, 1, 16]" = torch.ops.aten._to_copy.default(mul_3, dtype = torch.bfloat16); mul_3 = None
- mul_4: "bf16[1, 1, 16]" = torch.ops.aten.mul.Tensor(p_model_model_layers_slice_none__2__none____modules__0___input_layernorm_weight, _to_copy_7); p_model_model_layers_slice_none__2__none____modules__0___input_layernorm_weight = _to_copy_7 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:270 in forward, code: query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
- t: "bf16[16, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__0___self_attn_q_proj_weight); p_model_model_layers_slice_none__2__none____modules__0___self_attn_q_proj_weight = None
- view_4: "bf16[1, 16]" = torch.ops.aten.view.default(mul_4, [1, 16])
- mm: "bf16[1, 16]" = torch.ops.aten.mm.default(view_4, t); view_4 = t = None
- view_5: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm, [1, 1, 16]); mm = None
- view_6: "bf16[1, 1, 4, 4]" = torch.ops.aten.view.default(view_5, [1, 1, -1, 4]); view_5 = None
- transpose_1: "bf16[1, 4, 1, 4]" = torch.ops.aten.transpose.int(view_6, 1, 2); view_6 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:271 in forward, code: key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
- t_1: "bf16[16, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__0___self_attn_k_proj_weight); p_model_model_layers_slice_none__2__none____modules__0___self_attn_k_proj_weight = None
- view_7: "bf16[1, 16]" = torch.ops.aten.view.default(mul_4, [1, 16])
- mm_1: "bf16[1, 16]" = torch.ops.aten.mm.default(view_7, t_1); view_7 = t_1 = None
- view_8: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm_1, [1, 1, 16]); mm_1 = None
- view_9: "bf16[1, 1, 4, 4]" = torch.ops.aten.view.default(view_8, [1, 1, -1, 4]); view_8 = None
- transpose_2: "bf16[1, 4, 1, 4]" = torch.ops.aten.transpose.int(view_9, 1, 2); view_9 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:272 in forward, code: value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
- t_2: "bf16[16, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__0___self_attn_v_proj_weight); p_model_model_layers_slice_none__2__none____modules__0___self_attn_v_proj_weight = None
- view_10: "bf16[1, 16]" = torch.ops.aten.view.default(mul_4, [1, 16]); mul_4 = None
- mm_2: "bf16[1, 16]" = torch.ops.aten.mm.default(view_10, t_2); view_10 = t_2 = None
- view_11: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm_2, [1, 1, 16]); mm_2 = None
- view_12: "bf16[1, 1, 4, 4]" = torch.ops.aten.view.default(view_11, [1, 1, -1, 4]); view_11 = None
- transpose_3: "bf16[1, 4, 1, 4]" = torch.ops.aten.transpose.int(view_12, 1, 2); view_12 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:171 in apply_rotary_pos_emb, code: cos = cos.unsqueeze(unsqueeze_dim)
- unsqueeze_10: "bf16[1, 1, 1, 4]" = torch.ops.aten.unsqueeze.default(_to_copy_4, 1)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:172 in apply_rotary_pos_emb, code: sin = sin.unsqueeze(unsqueeze_dim)
- unsqueeze_11: "bf16[1, 1, 1, 4]" = torch.ops.aten.unsqueeze.default(_to_copy_5, 1)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:173 in apply_rotary_pos_emb, code: q_embed = (q * cos) + (rotate_half(q) * sin)
- mul_5: "bf16[1, 4, 1, 4]" = torch.ops.aten.mul.Tensor(transpose_1, unsqueeze_10)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:146 in rotate_half, code: x1 = x[..., : x.shape[-1] // 2]
- slice_30: "bf16[1, 4, 1, 2]" = torch.ops.aten.slice.Tensor(transpose_1, 3, 0, 2)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:147 in rotate_half, code: x2 = x[..., x.shape[-1] // 2 :]
- slice_31: "bf16[1, 4, 1, 2]" = torch.ops.aten.slice.Tensor(transpose_1, 3, 2, 9223372036854775807); transpose_1 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:148 in rotate_half, code: return torch.cat((-x2, x1), dim=-1)
- neg: "bf16[1, 4, 1, 2]" = torch.ops.aten.neg.default(slice_31); slice_31 = None
- cat_1: "bf16[1, 4, 1, 4]" = torch.ops.aten.cat.default([neg, slice_30], -1); neg = slice_30 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:173 in apply_rotary_pos_emb, code: q_embed = (q * cos) + (rotate_half(q) * sin)
- mul_6: "bf16[1, 4, 1, 4]" = torch.ops.aten.mul.Tensor(cat_1, unsqueeze_11); cat_1 = None
- add_2: "bf16[1, 4, 1, 4]" = torch.ops.aten.add.Tensor(mul_5, mul_6); mul_5 = mul_6 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:174 in apply_rotary_pos_emb, code: k_embed = (k * cos) + (rotate_half(k) * sin)
- mul_7: "bf16[1, 4, 1, 4]" = torch.ops.aten.mul.Tensor(transpose_2, unsqueeze_10); unsqueeze_10 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:146 in rotate_half, code: x1 = x[..., : x.shape[-1] // 2]
- slice_32: "bf16[1, 4, 1, 2]" = torch.ops.aten.slice.Tensor(transpose_2, 3, 0, 2)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:147 in rotate_half, code: x2 = x[..., x.shape[-1] // 2 :]
- slice_33: "bf16[1, 4, 1, 2]" = torch.ops.aten.slice.Tensor(transpose_2, 3, 2, 9223372036854775807); transpose_2 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:148 in rotate_half, code: return torch.cat((-x2, x1), dim=-1)
- neg_1: "bf16[1, 4, 1, 2]" = torch.ops.aten.neg.default(slice_33); slice_33 = None
- cat_2: "bf16[1, 4, 1, 4]" = torch.ops.aten.cat.default([neg_1, slice_32], -1); neg_1 = slice_32 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:174 in apply_rotary_pos_emb, code: k_embed = (k * cos) + (rotate_half(k) * sin)
- mul_8: "bf16[1, 4, 1, 4]" = torch.ops.aten.mul.Tensor(cat_2, unsqueeze_11); cat_2 = unsqueeze_11 = None
- add_3: "bf16[1, 4, 1, 4]" = torch.ops.aten.add.Tensor(mul_7, mul_8); mul_7 = mul_8 = None
- # File: /home/ilyas/transformers/src/transformers/cache_utils.py:1182 in update, code: key_states = key_states.to(k_out.dtype)
- _to_copy_8: "bf16[1, 4, 1, 4]" = torch.ops.aten._to_copy.default(add_3, dtype = torch.bfloat16); add_3 = None
- # File: /home/ilyas/transformers/src/transformers/cache_utils.py:1183 in update, code: value_states = value_states.to(v_out.dtype)
- _to_copy_9: "bf16[1, 4, 1, 4]" = torch.ops.aten._to_copy.default(transpose_3, dtype = torch.bfloat16); transpose_3 = None
- # File: /home/ilyas/transformers/src/transformers/cache_utils.py:1193 in update, code: k_out.index_copy_(2, cache_position, key_states)
- index_copy: "bf16[1, 4, 1234, 4]" = torch.ops.aten.index_copy.default(b___static_cache_key_cache_0, 2, cache_position, _to_copy_8); b___static_cache_key_cache_0 = _to_copy_8 = None
- # File: /home/ilyas/transformers/src/transformers/cache_utils.py:1194 in update, code: v_out.index_copy_(2, cache_position, value_states)
- index_copy_1: "bf16[1, 4, 1234, 4]" = torch.ops.aten.index_copy.default(b___static_cache_value_cache_0, 2, cache_position, _to_copy_9); b___static_cache_value_cache_0 = _to_copy_9 = None
- # File: /home/ilyas/transformers/src/transformers/integrations/sdpa_attention.py:48 in sdpa_attention_forward, code: attn_output = torch.nn.functional.scaled_dot_product_attention(
- slice_37: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_scatter_3, 0, 0, 9223372036854775807)
- slice_38: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_37, 1, 0, 9223372036854775807); slice_37 = None
- slice_39: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_38, 2, 0, 9223372036854775807); slice_38 = None
- _scaled_dot_product_flash_attention_for_cpu = torch.ops.aten._scaled_dot_product_flash_attention_for_cpu.default(add_2, index_copy, index_copy_1, attn_mask = slice_39, scale = 0.5); add_2 = slice_39 = None
- getitem: "bf16[1, 4, 1, 4]" = _scaled_dot_product_flash_attention_for_cpu[0]; _scaled_dot_product_flash_attention_for_cpu = None
- # File: /home/ilyas/transformers/src/transformers/integrations/sdpa_attention.py:57 in sdpa_attention_forward, code: attn_output = attn_output.transpose(1, 2).contiguous()
- transpose_4: "bf16[1, 1, 4, 4]" = torch.ops.aten.transpose.int(getitem, 1, 2); getitem = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:303 in forward, code: attn_output = attn_output.reshape(*input_shape, -1).contiguous()
- view_13: "bf16[1, 1, 16]" = torch.ops.aten.view.default(transpose_4, [1, 1, -1]); transpose_4 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:304 in forward, code: attn_output = self.o_proj(attn_output)
- t_3: "bf16[16, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__0___self_attn_o_proj_weight); p_model_model_layers_slice_none__2__none____modules__0___self_attn_o_proj_weight = None
- view_14: "bf16[1, 16]" = torch.ops.aten.view.default(view_13, [1, 16]); view_13 = None
- mm_3: "bf16[1, 16]" = torch.ops.aten.mm.default(view_14, t_3); view_14 = t_3 = None
- view_15: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm_3, [1, 1, 16]); mm_3 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:347 in forward, code: hidden_states = residual + hidden_states
- add_4: "bf16[1, 1, 16]" = torch.ops.aten.add.Tensor(embedding, view_15); embedding = view_15 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:71 in forward, code: hidden_states = hidden_states.to(torch.float32)
- _to_copy_10: "f32[1, 1, 16]" = torch.ops.aten._to_copy.default(add_4, dtype = torch.float32)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:72 in forward, code: variance = hidden_states.pow(2).mean(-1, keepdim=True)
- pow_2: "f32[1, 1, 16]" = torch.ops.aten.pow.Tensor_Scalar(_to_copy_10, 2)
- mean_1: "f32[1, 1, 1]" = torch.ops.aten.mean.dim(pow_2, [-1], True); pow_2 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:73 in forward, code: hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
- add_5: "f32[1, 1, 1]" = torch.ops.aten.add.Tensor(mean_1, 1e-06); mean_1 = None
- rsqrt_1: "f32[1, 1, 1]" = torch.ops.aten.rsqrt.default(add_5); add_5 = None
- mul_9: "f32[1, 1, 16]" = torch.ops.aten.mul.Tensor(_to_copy_10, rsqrt_1); _to_copy_10 = rsqrt_1 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:74 in forward, code: return self.weight * hidden_states.to(input_dtype)
- _to_copy_11: "bf16[1, 1, 16]" = torch.ops.aten._to_copy.default(mul_9, dtype = torch.bfloat16); mul_9 = None
- mul_10: "bf16[1, 1, 16]" = torch.ops.aten.mul.Tensor(p_model_model_layers_slice_none__2__none____modules__0___post_attention_layernorm_weight, _to_copy_11); p_model_model_layers_slice_none__2__none____modules__0___post_attention_layernorm_weight = _to_copy_11 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:190 in forward, code: down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
- t_4: "bf16[16, 64]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__0___mlp_gate_proj_weight); p_model_model_layers_slice_none__2__none____modules__0___mlp_gate_proj_weight = None
- view_16: "bf16[1, 16]" = torch.ops.aten.view.default(mul_10, [1, 16])
- mm_4: "bf16[1, 64]" = torch.ops.aten.mm.default(view_16, t_4); view_16 = t_4 = None
- view_17: "bf16[1, 1, 64]" = torch.ops.aten.view.default(mm_4, [1, 1, 64]); mm_4 = None
- silu: "bf16[1, 1, 64]" = torch.ops.aten.silu.default(view_17); view_17 = None
- t_5: "bf16[16, 64]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__0___mlp_up_proj_weight); p_model_model_layers_slice_none__2__none____modules__0___mlp_up_proj_weight = None
- view_18: "bf16[1, 16]" = torch.ops.aten.view.default(mul_10, [1, 16]); mul_10 = None
- mm_5: "bf16[1, 64]" = torch.ops.aten.mm.default(view_18, t_5); view_18 = t_5 = None
- view_19: "bf16[1, 1, 64]" = torch.ops.aten.view.default(mm_5, [1, 1, 64]); mm_5 = None
- mul_11: "bf16[1, 1, 64]" = torch.ops.aten.mul.Tensor(silu, view_19); silu = view_19 = None
- t_6: "bf16[64, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__0___mlp_down_proj_weight); p_model_model_layers_slice_none__2__none____modules__0___mlp_down_proj_weight = None
- view_20: "bf16[1, 64]" = torch.ops.aten.view.default(mul_11, [1, 64]); mul_11 = None
- mm_6: "bf16[1, 16]" = torch.ops.aten.mm.default(view_20, t_6); view_20 = t_6 = None
- view_21: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm_6, [1, 1, 16]); mm_6 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:353 in forward, code: hidden_states = residual + hidden_states
- add_6: "bf16[1, 1, 16]" = torch.ops.aten.add.Tensor(add_4, view_21); add_4 = view_21 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:71 in forward, code: hidden_states = hidden_states.to(torch.float32)
- _to_copy_12: "f32[1, 1, 16]" = torch.ops.aten._to_copy.default(add_6, dtype = torch.float32)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:72 in forward, code: variance = hidden_states.pow(2).mean(-1, keepdim=True)
- pow_3: "f32[1, 1, 16]" = torch.ops.aten.pow.Tensor_Scalar(_to_copy_12, 2)
- mean_2: "f32[1, 1, 1]" = torch.ops.aten.mean.dim(pow_3, [-1], True); pow_3 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:73 in forward, code: hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
- add_7: "f32[1, 1, 1]" = torch.ops.aten.add.Tensor(mean_2, 1e-06); mean_2 = None
- rsqrt_2: "f32[1, 1, 1]" = torch.ops.aten.rsqrt.default(add_7); add_7 = None
- mul_12: "f32[1, 1, 16]" = torch.ops.aten.mul.Tensor(_to_copy_12, rsqrt_2); _to_copy_12 = rsqrt_2 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:74 in forward, code: return self.weight * hidden_states.to(input_dtype)
- _to_copy_13: "bf16[1, 1, 16]" = torch.ops.aten._to_copy.default(mul_12, dtype = torch.bfloat16); mul_12 = None
- mul_13: "bf16[1, 1, 16]" = torch.ops.aten.mul.Tensor(p_model_model_layers_slice_none__2__none____modules__1___input_layernorm_weight, _to_copy_13); p_model_model_layers_slice_none__2__none____modules__1___input_layernorm_weight = _to_copy_13 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:270 in forward, code: query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
- t_7: "bf16[16, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__1___self_attn_q_proj_weight); p_model_model_layers_slice_none__2__none____modules__1___self_attn_q_proj_weight = None
- view_22: "bf16[1, 16]" = torch.ops.aten.view.default(mul_13, [1, 16])
- mm_7: "bf16[1, 16]" = torch.ops.aten.mm.default(view_22, t_7); view_22 = t_7 = None
- view_23: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm_7, [1, 1, 16]); mm_7 = None
- view_24: "bf16[1, 1, 4, 4]" = torch.ops.aten.view.default(view_23, [1, 1, -1, 4]); view_23 = None
- transpose_5: "bf16[1, 4, 1, 4]" = torch.ops.aten.transpose.int(view_24, 1, 2); view_24 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:271 in forward, code: key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
- t_8: "bf16[16, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__1___self_attn_k_proj_weight); p_model_model_layers_slice_none__2__none____modules__1___self_attn_k_proj_weight = None
- view_25: "bf16[1, 16]" = torch.ops.aten.view.default(mul_13, [1, 16])
- mm_8: "bf16[1, 16]" = torch.ops.aten.mm.default(view_25, t_8); view_25 = t_8 = None
- view_26: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm_8, [1, 1, 16]); mm_8 = None
- view_27: "bf16[1, 1, 4, 4]" = torch.ops.aten.view.default(view_26, [1, 1, -1, 4]); view_26 = None
- transpose_6: "bf16[1, 4, 1, 4]" = torch.ops.aten.transpose.int(view_27, 1, 2); view_27 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:272 in forward, code: value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
- t_9: "bf16[16, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__1___self_attn_v_proj_weight); p_model_model_layers_slice_none__2__none____modules__1___self_attn_v_proj_weight = None
- view_28: "bf16[1, 16]" = torch.ops.aten.view.default(mul_13, [1, 16]); mul_13 = None
- mm_9: "bf16[1, 16]" = torch.ops.aten.mm.default(view_28, t_9); view_28 = t_9 = None
- view_29: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm_9, [1, 1, 16]); mm_9 = None
- view_30: "bf16[1, 1, 4, 4]" = torch.ops.aten.view.default(view_29, [1, 1, -1, 4]); view_29 = None
- transpose_7: "bf16[1, 4, 1, 4]" = torch.ops.aten.transpose.int(view_30, 1, 2); view_30 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:171 in apply_rotary_pos_emb, code: cos = cos.unsqueeze(unsqueeze_dim)
- unsqueeze_12: "bf16[1, 1, 1, 4]" = torch.ops.aten.unsqueeze.default(_to_copy_4, 1); _to_copy_4 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:172 in apply_rotary_pos_emb, code: sin = sin.unsqueeze(unsqueeze_dim)
- unsqueeze_13: "bf16[1, 1, 1, 4]" = torch.ops.aten.unsqueeze.default(_to_copy_5, 1); _to_copy_5 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:173 in apply_rotary_pos_emb, code: q_embed = (q * cos) + (rotate_half(q) * sin)
- mul_14: "bf16[1, 4, 1, 4]" = torch.ops.aten.mul.Tensor(transpose_5, unsqueeze_12)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:146 in rotate_half, code: x1 = x[..., : x.shape[-1] // 2]
- slice_40: "bf16[1, 4, 1, 2]" = torch.ops.aten.slice.Tensor(transpose_5, 3, 0, 2)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:147 in rotate_half, code: x2 = x[..., x.shape[-1] // 2 :]
- slice_41: "bf16[1, 4, 1, 2]" = torch.ops.aten.slice.Tensor(transpose_5, 3, 2, 9223372036854775807); transpose_5 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:148 in rotate_half, code: return torch.cat((-x2, x1), dim=-1)
- neg_2: "bf16[1, 4, 1, 2]" = torch.ops.aten.neg.default(slice_41); slice_41 = None
- cat_3: "bf16[1, 4, 1, 4]" = torch.ops.aten.cat.default([neg_2, slice_40], -1); neg_2 = slice_40 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:173 in apply_rotary_pos_emb, code: q_embed = (q * cos) + (rotate_half(q) * sin)
- mul_15: "bf16[1, 4, 1, 4]" = torch.ops.aten.mul.Tensor(cat_3, unsqueeze_13); cat_3 = None
- add_8: "bf16[1, 4, 1, 4]" = torch.ops.aten.add.Tensor(mul_14, mul_15); mul_14 = mul_15 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:174 in apply_rotary_pos_emb, code: k_embed = (k * cos) + (rotate_half(k) * sin)
- mul_16: "bf16[1, 4, 1, 4]" = torch.ops.aten.mul.Tensor(transpose_6, unsqueeze_12); unsqueeze_12 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:146 in rotate_half, code: x1 = x[..., : x.shape[-1] // 2]
- slice_42: "bf16[1, 4, 1, 2]" = torch.ops.aten.slice.Tensor(transpose_6, 3, 0, 2)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:147 in rotate_half, code: x2 = x[..., x.shape[-1] // 2 :]
- slice_43: "bf16[1, 4, 1, 2]" = torch.ops.aten.slice.Tensor(transpose_6, 3, 2, 9223372036854775807); transpose_6 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:148 in rotate_half, code: return torch.cat((-x2, x1), dim=-1)
- neg_3: "bf16[1, 4, 1, 2]" = torch.ops.aten.neg.default(slice_43); slice_43 = None
- cat_4: "bf16[1, 4, 1, 4]" = torch.ops.aten.cat.default([neg_3, slice_42], -1); neg_3 = slice_42 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:174 in apply_rotary_pos_emb, code: k_embed = (k * cos) + (rotate_half(k) * sin)
- mul_17: "bf16[1, 4, 1, 4]" = torch.ops.aten.mul.Tensor(cat_4, unsqueeze_13); cat_4 = unsqueeze_13 = None
- add_9: "bf16[1, 4, 1, 4]" = torch.ops.aten.add.Tensor(mul_16, mul_17); mul_16 = mul_17 = None
- # File: /home/ilyas/transformers/src/transformers/cache_utils.py:1182 in update, code: key_states = key_states.to(k_out.dtype)
- _to_copy_14: "bf16[1, 4, 1, 4]" = torch.ops.aten._to_copy.default(add_9, dtype = torch.bfloat16); add_9 = None
- # File: /home/ilyas/transformers/src/transformers/cache_utils.py:1183 in update, code: value_states = value_states.to(v_out.dtype)
- _to_copy_15: "bf16[1, 4, 1, 4]" = torch.ops.aten._to_copy.default(transpose_7, dtype = torch.bfloat16); transpose_7 = None
- # File: /home/ilyas/transformers/src/transformers/cache_utils.py:1193 in update, code: k_out.index_copy_(2, cache_position, key_states)
- index_copy_2: "bf16[1, 4, 1234, 4]" = torch.ops.aten.index_copy.default(b___static_cache_key_cache_1, 2, cache_position, _to_copy_14); b___static_cache_key_cache_1 = _to_copy_14 = None
- # File: /home/ilyas/transformers/src/transformers/cache_utils.py:1194 in update, code: v_out.index_copy_(2, cache_position, value_states)
- index_copy_3: "bf16[1, 4, 1234, 4]" = torch.ops.aten.index_copy.default(b___static_cache_value_cache_1, 2, cache_position, _to_copy_15); b___static_cache_value_cache_1 = cache_position = _to_copy_15 = None
- # File: /home/ilyas/transformers/src/transformers/integrations/sdpa_attention.py:48 in sdpa_attention_forward, code: attn_output = torch.nn.functional.scaled_dot_product_attention(
- slice_47: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_scatter_3, 0, 0, 9223372036854775807); slice_scatter_3 = None
- slice_48: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_47, 1, 0, 9223372036854775807); slice_47 = None
- slice_49: "bf16[1, 1, 1, 1234]" = torch.ops.aten.slice.Tensor(slice_48, 2, 0, 9223372036854775807); slice_48 = None
- _scaled_dot_product_flash_attention_for_cpu_1 = torch.ops.aten._scaled_dot_product_flash_attention_for_cpu.default(add_8, index_copy_2, index_copy_3, attn_mask = slice_49, scale = 0.5); add_8 = slice_49 = None
- getitem_2: "bf16[1, 4, 1, 4]" = _scaled_dot_product_flash_attention_for_cpu_1[0]; _scaled_dot_product_flash_attention_for_cpu_1 = None
- # File: /home/ilyas/transformers/src/transformers/integrations/sdpa_attention.py:57 in sdpa_attention_forward, code: attn_output = attn_output.transpose(1, 2).contiguous()
- transpose_8: "bf16[1, 1, 4, 4]" = torch.ops.aten.transpose.int(getitem_2, 1, 2); getitem_2 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:303 in forward, code: attn_output = attn_output.reshape(*input_shape, -1).contiguous()
- view_31: "bf16[1, 1, 16]" = torch.ops.aten.view.default(transpose_8, [1, 1, -1]); transpose_8 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:304 in forward, code: attn_output = self.o_proj(attn_output)
- t_10: "bf16[16, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__1___self_attn_o_proj_weight); p_model_model_layers_slice_none__2__none____modules__1___self_attn_o_proj_weight = None
- view_32: "bf16[1, 16]" = torch.ops.aten.view.default(view_31, [1, 16]); view_31 = None
- mm_10: "bf16[1, 16]" = torch.ops.aten.mm.default(view_32, t_10); view_32 = t_10 = None
- view_33: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm_10, [1, 1, 16]); mm_10 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:347 in forward, code: hidden_states = residual + hidden_states
- add_10: "bf16[1, 1, 16]" = torch.ops.aten.add.Tensor(add_6, view_33); add_6 = view_33 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:71 in forward, code: hidden_states = hidden_states.to(torch.float32)
- _to_copy_16: "f32[1, 1, 16]" = torch.ops.aten._to_copy.default(add_10, dtype = torch.float32)
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:72 in forward, code: variance = hidden_states.pow(2).mean(-1, keepdim=True)
- pow_4: "f32[1, 1, 16]" = torch.ops.aten.pow.Tensor_Scalar(_to_copy_16, 2)
- mean_3: "f32[1, 1, 1]" = torch.ops.aten.mean.dim(pow_4, [-1], True); pow_4 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:73 in forward, code: hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
- add_11: "f32[1, 1, 1]" = torch.ops.aten.add.Tensor(mean_3, 1e-06); mean_3 = None
- rsqrt_3: "f32[1, 1, 1]" = torch.ops.aten.rsqrt.default(add_11); add_11 = None
- mul_18: "f32[1, 1, 16]" = torch.ops.aten.mul.Tensor(_to_copy_16, rsqrt_3); _to_copy_16 = rsqrt_3 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:74 in forward, code: return self.weight * hidden_states.to(input_dtype)
- _to_copy_17: "bf16[1, 1, 16]" = torch.ops.aten._to_copy.default(mul_18, dtype = torch.bfloat16); mul_18 = None
- mul_19: "bf16[1, 1, 16]" = torch.ops.aten.mul.Tensor(p_model_model_layers_slice_none__2__none____modules__1___post_attention_layernorm_weight, _to_copy_17); p_model_model_layers_slice_none__2__none____modules__1___post_attention_layernorm_weight = _to_copy_17 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:190 in forward, code: down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
- t_11: "bf16[16, 64]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__1___mlp_gate_proj_weight); p_model_model_layers_slice_none__2__none____modules__1___mlp_gate_proj_weight = None
- view_34: "bf16[1, 16]" = torch.ops.aten.view.default(mul_19, [1, 16])
- mm_11: "bf16[1, 64]" = torch.ops.aten.mm.default(view_34, t_11); view_34 = t_11 = None
- view_35: "bf16[1, 1, 64]" = torch.ops.aten.view.default(mm_11, [1, 1, 64]); mm_11 = None
- silu_1: "bf16[1, 1, 64]" = torch.ops.aten.silu.default(view_35); view_35 = None
- t_12: "bf16[16, 64]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__1___mlp_up_proj_weight); p_model_model_layers_slice_none__2__none____modules__1___mlp_up_proj_weight = None
- view_36: "bf16[1, 16]" = torch.ops.aten.view.default(mul_19, [1, 16]); mul_19 = None
- mm_12: "bf16[1, 64]" = torch.ops.aten.mm.default(view_36, t_12); view_36 = t_12 = None
- view_37: "bf16[1, 1, 64]" = torch.ops.aten.view.default(mm_12, [1, 1, 64]); mm_12 = None
- mul_20: "bf16[1, 1, 64]" = torch.ops.aten.mul.Tensor(silu_1, view_37); silu_1 = view_37 = None
- t_13: "bf16[64, 16]" = torch.ops.aten.t.default(p_model_model_layers_slice_none__2__none____modules__1___mlp_down_proj_weight); p_model_model_layers_slice_none__2__none____modules__1___mlp_down_proj_weight = None
- view_38: "bf16[1, 64]" = torch.ops.aten.view.default(mul_20, [1, 64]); mul_20 = None
- mm_13: "bf16[1, 16]" = torch.ops.aten.mm.default(view_38, t_13); view_38 = t_13 = None
- view_39: "bf16[1, 1, 16]" = torch.ops.aten.view.default(mm_13, [1, 1, 16]); mm_13 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:353 in forward, code: hidden_states = residual + hidden_states
- add_12: "bf16[1, 1, 16]" = torch.ops.aten.add.Tensor(add_10, view_39); add_10 = view_39 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:71 in forward, code: hidden_states = hidden_states.to(torch.float32)
- _to_copy_18: "f32[1, 1, 16]" = torch.ops.aten._to_copy.default(add_12, dtype = torch.float32); add_12 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:72 in forward, code: variance = hidden_states.pow(2).mean(-1, keepdim=True)
- pow_5: "f32[1, 1, 16]" = torch.ops.aten.pow.Tensor_Scalar(_to_copy_18, 2)
- mean_4: "f32[1, 1, 1]" = torch.ops.aten.mean.dim(pow_5, [-1], True); pow_5 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:73 in forward, code: hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
- add_13: "f32[1, 1, 1]" = torch.ops.aten.add.Tensor(mean_4, 1e-06); mean_4 = None
- rsqrt_4: "f32[1, 1, 1]" = torch.ops.aten.rsqrt.default(add_13); add_13 = None
- mul_21: "f32[1, 1, 16]" = torch.ops.aten.mul.Tensor(_to_copy_18, rsqrt_4); _to_copy_18 = rsqrt_4 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:74 in forward, code: return self.weight * hidden_states.to(input_dtype)
- _to_copy_19: "bf16[1, 1, 16]" = torch.ops.aten._to_copy.default(mul_21, dtype = torch.bfloat16); mul_21 = None
- mul_22: "bf16[1, 1, 16]" = torch.ops.aten.mul.Tensor(p_model_model_norm_weight, _to_copy_19); p_model_model_norm_weight = _to_copy_19 = None
- # File: /home/ilyas/transformers/src/transformers/models/llama/modeling_llama.py:856 in forward, code: logits = self.lm_head(hidden_states[:, slice_indices, :])
- slice_50: "bf16[1, 1, 16]" = torch.ops.aten.slice.Tensor(mul_22, 0, 0, 9223372036854775807); mul_22 = None
- slice_51: "bf16[1, 1, 16]" = torch.ops.aten.slice.Tensor(slice_50, 1, 0, 9223372036854775807); slice_50 = None
- slice_52: "bf16[1, 1, 16]" = torch.ops.aten.slice.Tensor(slice_51, 2, 0, 9223372036854775807); slice_51 = None
- t_14: "bf16[16, 32000]" = torch.ops.aten.t.default(p_model_lm_head_weight); p_model_lm_head_weight = None
- view_40: "bf16[1, 16]" = torch.ops.aten.view.default(slice_52, [1, 16]); slice_52 = None
- mm_14: "bf16[1, 32000]" = torch.ops.aten.mm.default(view_40, t_14); view_40 = t_14 = None
- view_41: "bf16[1, 1, 32000]" = torch.ops.aten.view.default(mm_14, [1, 1, 32000]); mm_14 = None
- return (index_copy, index_copy_1, index_copy_2, index_copy_3, view_41)
- Graph signature: ExportGraphSignature(input_specs=[InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__0___input_layernorm_weight'), target='model.model.layers.0.input_layernorm.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__0___post_attention_layernorm_weight'), target='model.model.layers.0.post_attention_layernorm.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__1___input_layernorm_weight'), target='model.model.layers.1.input_layernorm.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__1___post_attention_layernorm_weight'), target='model.model.layers.1.post_attention_layernorm.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_norm_weight'), target='model.model.norm.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_embed_tokens_weight'), target='model.model.embed_tokens.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__0___self_attn_q_proj_weight'), target='model.model.layers.0.self_attn.q_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__0___self_attn_k_proj_weight'), target='model.model.layers.0.self_attn.k_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__0___self_attn_v_proj_weight'), target='model.model.layers.0.self_attn.v_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__0___self_attn_o_proj_weight'), target='model.model.layers.0.self_attn.o_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__0___mlp_gate_proj_weight'), target='model.model.layers.0.mlp.gate_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__0___mlp_up_proj_weight'), target='model.model.layers.0.mlp.up_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__0___mlp_down_proj_weight'), target='model.model.layers.0.mlp.down_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__1___self_attn_q_proj_weight'), target='model.model.layers.1.self_attn.q_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__1___self_attn_k_proj_weight'), target='model.model.layers.1.self_attn.k_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__1___self_attn_v_proj_weight'), target='model.model.layers.1.self_attn.v_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__1___self_attn_o_proj_weight'), target='model.model.layers.1.self_attn.o_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__1___mlp_gate_proj_weight'), target='model.model.layers.1.mlp.gate_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__1___mlp_up_proj_weight'), target='model.model.layers.1.mlp.up_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_model_layers_slice_none__2__none____modules__1___mlp_down_proj_weight'), target='model.model.layers.1.mlp.down_proj.weight', persistent=None), InputSpec(kind=<InputKind.PARAMETER: 2>, arg=TensorArgument(name='p_model_lm_head_weight'), target='model.lm_head.weight', persistent=None), InputSpec(kind=<InputKind.BUFFER: 3>, arg=TensorArgument(name='b_mask'), target='mask', persistent=False), InputSpec(kind=<InputKind.BUFFER: 3>, arg=TensorArgument(name='b___static_cache_key_cache_0'), target='key_cache_0', persistent=False), InputSpec(kind=<InputKind.BUFFER: 3>, arg=TensorArgument(name='b_model_model_rotary_emb_inv_freq'), target='model.model.rotary_emb.inv_freq', persistent=False), InputSpec(kind=<InputKind.BUFFER: 3>, arg=TensorArgument(name='b___static_cache_value_cache_0'), target='value_cache_0', persistent=False), InputSpec(kind=<InputKind.BUFFER: 3>, arg=TensorArgument(name='b___static_cache_key_cache_1'), target='key_cache_1', persistent=False), InputSpec(kind=<InputKind.BUFFER: 3>, arg=TensorArgument(name='b___static_cache_value_cache_1'), target='value_cache_1', persistent=False), InputSpec(kind=<InputKind.USER_INPUT: 1>, arg=TensorArgument(name='input_ids'), target=None, persistent=None), InputSpec(kind=<InputKind.USER_INPUT: 1>, arg=TensorArgument(name='cache_position'), target=None, persistent=None)], output_specs=[OutputSpec(kind=<OutputKind.BUFFER_MUTATION: 3>, arg=TensorArgument(name='index_copy'), target='key_cache_0'), OutputSpec(kind=<OutputKind.BUFFER_MUTATION: 3>, arg=TensorArgument(name='index_copy_1'), target='value_cache_0'), OutputSpec(kind=<OutputKind.BUFFER_MUTATION: 3>, arg=TensorArgument(name='index_copy_2'), target='key_cache_1'), OutputSpec(kind=<OutputKind.BUFFER_MUTATION: 3>, arg=TensorArgument(name='index_copy_3'), target='value_cache_1'), OutputSpec(kind=<OutputKind.USER_OUTPUT: 1>, arg=TensorArgument(name='view_41'), target=None)])
- Range constraints: {}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement