/* alignment-independent load */
/* IN: $r10: addr
* OUT: $r10: val = D[addr]
*/
ld_32:
ld b8 $r11 D[$r10 + 3]
shl b32 $r11 8
ld b8 $r11 D[$r10 + 2]
shl b32 $r11 8
ld b8 $r11 D[$r10 + 1]
shl b32 $r11 8
ld b8 $r11 D[$r10 + 0]
mov b32 $r10 $r11
ret
ld_16:
clear b32 $r11
ld b8 $r11 D[$r10 + 1]
shl b16 $r11 8
ld b8 $r11 D[$r10 + 0]
mov b32 $r10 $r11
ret
ld_08:
clear b32 $r11
ld b8 $r11 D[$r10 + 0]
mov b32 $r10 $r11
ret