Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Module with 1 kernels, global data with 0 words (64-bit each), starting at offset 1 words and 0 words of stack-frame
- // Kernel 'sum_f16', offset 2, with following parameters: __global float* a (4 B, 1 items), __global float* b (4 B, 1 items), __global float* c (4 B, 1 items)
- or -, unif, unif
- or r1, unif, unif
- or ra4, unif, unif
- or -, unif, unif
- or -, unif, unif
- or -, unif, unif
- or r0, unif, unif
- or -, unif, unif
- or -, unif, unif
- or r2, unif, unif
- or -, unif, unif
- or -, unif, unif
- or -, unif, unif
- or ra0, unif, unif
- or ra2, unif, unif
- or ra1, unif, unif
- or ra3, r0, r0; v8min r0, 0 (0), 0 (0)
- shr r0, r1, r0
- ldi r1, 255
- and r3, r0, r1
- or r2, r2, r2; v8min r0, 0 (0), 0 (0)
- shr r0, ra4, r0
- and r1, r0, r1
- mul24 r0, ra3, r3
- add r0, r2, r0
- add r0, r0, r1
- shl r1, r0, 4 (4)
- nop.never
- or r0, r1, r1
- shl r0, r0, 2 (2)
- add r0, ra2, r0
- or r0, r0, r0
- or -, mutex_acq, mutex_acq
- ldi vpr_setup, 2147553280
- ldi vpr_setup, 2415919168
- or vpr_addr, r0, r0
- or r0, r1, r1
- or r1, r1, r1
- shl r0, r0, 2 (2)
- shl r1, r1, 2 (2)
- add r0, ra1, r0
- add r1, ra0, r1
- or -, vpr_wait, vpr_wait
- ldi vpr_setup, 1055232
- or r3, vpm, vpm
- or mutex_rel, 1 (1), 1 (1)
- or r0, r0, r0
- or -, mutex_acq, mutex_acq
- ldi vpr_setup, 2147553280
- ldi vpr_setup, 2415919168
- or vpr_addr, r0, r0
- or ra0, r1, r1
- or -, vpr_wait, vpr_wait
- ldi vpr_setup, 1055232
- or r2, vpm, vpm
- or mutex_rel, 1 (1), 1 (1)
- fmul r1, r3, r2
- fmul r0, r3, r2
- fadd r1, r0, r1
- fmul r0, r3, r2
- fadd r0, r0, r1
- or -, mutex_acq, mutex_acq
- ldi vpw_setup, 6656
- or vpm, r0, r0
- ldi vpw_setup, 2156937216
- ldi vpw_setup, 3221291008
- or vpw_addr, ra0, ra0
- or -, vpw_wait, vpw_wait
- or mutex_rel, 1 (1), 1 (1)
- or r0, unif, unif
- or.setf -, elem_num, r0
- brr.ifallzc (pc+4) + -75
- nop.never
- nop.never
- nop.never
- not irq, qpu_num
- nop.thrend.never
- nop.never
- nop.never
Add Comment
Please, Sign In to add comment