Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ======================== INT========================
- //
- // Generated by NVIDIA LLVM Compiler 4.0
- //
- .version 2.2
- .target sm_21, texmode_independent
- .entry kern(
- .param .u32 .ptr .global .align 4 kern_param_0
- )
- {
- .reg .s32 %r<12>;
- _kern:
- mov.u32 %r1, %tid.x;
- mov.u32 %r2, %envreg3;
- add.s32 %r3, %r1, %r2;
- mov.u32 %r4, %ctaid.x;
- mov.u32 %r5, %ntid.x;
- mad.lo.s32 %r6, %r4, %r5, %r3;
- shl.b32 %r7, %r6, 2;
- ld.param.u32 %r8, [kern_param_0];
- add.s32 %r9, %r8, %r7;
- ld.global.u32 %r10, [%r9];
- add.s32 %r11, %r10, 1784293664;
- st.global.u32 [%r9], %r11;
- ret;
- }
- ======================== FLOAT ========================
- //
- // Generated by NVIDIA LLVM Compiler 4.0
- //
- .version 2.2
- .target sm_21, texmode_independent
- .entry kern(
- .param .u32 .ptr .global .align 4 kern_param_0
- )
- {
- .reg .f32 %f<6>;
- .reg .pred %p<2>;
- .reg .s32 %r<14>;
- _kern:
- mov.u32 %r5, %tid.x;
- mov.u32 %r6, %envreg3;
- add.s32 %r7, %r5, %r6;
- mov.u32 %r8, %ctaid.x;
- mov.u32 %r9, %ntid.x;
- mad.lo.s32 %r10, %r8, %r9, %r7;
- shl.b32 %r11, %r10, 2;
- ld.param.u32 %r12, [kern_param_0];
- add.s32 %r1, %r12, %r11;
- ld.global.f32 %f5, [%r1];
- mov.u32 %r13, 0;
- BB1_1: // %._crit_edge
- mov.u32 %r2, %r13;
- add.s32 %r3, %r2, 1;
- cvt.rn.f32.s32 %f4, %r2;
- add.rn.f32 %f5, %f5, %f4;
- setp.ne.s32 %p1, %r3, 1000001;
- mov.u32 %r13, %r3;
- @%p1 bra BB1_1;
- BB1_2:
- st.global.f32 [%r1], %f5;
- ret;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement