Advertisement
Guest User

Untitled

a guest
Jul 30th, 2014
208
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.53 KB | None | 0 0
  1. PH3NOM
  2.  
  3. Post subject: Re: 3D Z Clipping For the PVR + OpenGL
  4.  
  5. PostPosted: Sun Mar 09, 2014 7:12 pm
  6.  
  7.  
  8.  
  9. Offline
  10. DC Developer
  11. DC Developer
  12. User avatar
  13.  
  14. Joined: Fri Jun 18, 2010 7:29 pm
  15. Posts: 359
  16.  
  17.  
  18. moribus wrote:
  19.  
  20. Any news about this project ? :)
  21.  
  22.  
  23. Thanks for the interest guys!
  24.  
  25. Yes, I am still at work on the finishing touches.
  26.  
  27. I decided to re-write some of the lighting code in pure SH4 assembly to get some speed gains.
  28. Good news, the lighting code is now able to produce over 2 times the throughput as before using c with inline asm.
  29.  
  30.  
  31.  
  32. Spoiler: hide
  33.  
  34. sh4_light.S
  35. Code:
  36.  
  37. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  38. ! SH4 Assembly Light Code (C) 2014 Josh PH3NOM Pearson
  39. ! Computes Diffuse and Attenuation Factors
  40. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  41.  
  42. .globl _sh4_light_3f
  43.  
  44. !void sh4_light_3f( void *glLight, void *vertex6f, void *out )
  45. !fr0 = vertex x = dr0 = fv0
  46. !fr1 = vertex y
  47. !fr2 = vertex z = dr2
  48. !fr3 = vertex w
  49. !fr4 = light x = dr4 = fv4
  50. !fr5 = light y
  51. !fr6 = light z = dr6
  52. !fr7 = light w
  53. !fr8 = normal x = dr8 = fv8
  54. !fr9 = normal y
  55. !fr10 = normal z = dr10
  56. !fr11 = normal w
  57. !fr12 = misc x = dr12 = fv12
  58. !fr13 = misc y
  59. !fr14 = misc z = dr14
  60. !fr15 = misc w
  61. !r0 = boolean flag1
  62. !r1 = boolean flag2
  63. !r2 = boolean flag3
  64. !r3 =
  65. !r4 = [arg][void*] glLight Structure
  66. !r5 = [arg][void*] float3 Vertex Position / Normal
  67. !r6 = [arg][void*] float3 Output Write Address FOR D, A light factors
  68.  
  69. _sh4_light_3f:
  70. mov #0, r1 ! boolean flag2
  71. fmov @r5+, fr0 ! load vertex to fv0
  72. fmov @r5+, fr1 !
  73. fmov @r5+, fr2 !
  74. fldi0 fr3 ! load 0 for w
  75.  
  76. fmov @r4+, fr4 ! load light position to fv4
  77. fmov @r4+, fr5 !
  78. fmov @r4+, fr6 !
  79. fmov @r4+, fr7 !
  80.  
  81. fcmp/gt fr15, fr7 ! light w component set = spot light
  82. bt .SPOTL1s
  83. bf .LIGHT1s
  84.  
  85. .SPOTL1s: ! Spotlight Calculations
  86.  
  87. fmov fr0, fr8 ! copy vertex position to fv12
  88. fmov fr1, fr9 !
  89. fmov fr2, fr10 !
  90. fldi0 fr11 ! load 0 for w
  91.  
  92. fsub fr4, fr8 ! vetex-to-light vector
  93. fsub fr5, fr9 !
  94. fsub fr6, fr10 !
  95.  
  96. fipr fv8, fv8 ! Normalize vertex-to-light vector
  97. fsqrt fr11
  98.  
  99. fcmp/gt fr3, fr11 ! Check for divide-by-zero
  100. bt .DIV3Fsl
  101. bf .Csl
  102.  
  103. .DIV3Fsl:
  104. fdiv fr11, fr8
  105. fdiv fr11, fr9
  106. fdiv fr11, fr10
  107.  
  108. .Csl:
  109. mov #1, r1 ! flag input light has read next 4 float vector
  110. fmov @r4+, fr12 ! load spot light direction to fv8
  111. fmov @r4+, fr13 !
  112. fmov @r4+, fr14 !
  113. fldi0 fr11
  114. fldi0 fr15
  115. fipr fv12, fv8 ! fr11 now holds the cosDir of vertex-to-light
  116. fmov @r4+, fr15 ! fr15 now holds spot light cutoff
  117.  
  118. fcmp/gt fr15, fr11 ! if cosDir > spotCutOff, vertex gets no light
  119. bf .RET0s
  120.  
  121. .LIGHT1s:
  122. fsub fr0, fr4 ! fv4 = L = normalize(light pos - vertex pos)
  123. fsub fr1, fr5
  124. fsub fr2, fr6
  125. fldi0 fr7
  126. fipr fv4, fv4
  127. fsqrt fr7
  128.  
  129. fcmp/gt fr3, fr7 ! Check for divide-by-zero
  130. bt .DIV3Fl
  131. bf .Cl
  132.  
  133. .DIV3Fl:
  134. fdiv fr7, fr4
  135. fdiv fr7, fr5
  136. fdiv fr7, fr6
  137.  
  138. .Cl:
  139. fmov @r5+, fr8 ! fv8 = N = vertex normal
  140. fmov @r5+, fr9
  141. fmov @r5, fr10
  142. fldi0 fr11 ! load 0 for N w
  143.  
  144. fmov fr7, fr3 ! store L vector length to fr3
  145. fldi0 fr7 ! load 0 for L w
  146.  
  147. fipr fv8, fv4 ! N dot L
  148.  
  149. fcmp/gt fr3, fr7 ! fr7 = Diffuse Mag >= 0 ?
  150. bf .RET0s ! Diffuse Mag < 0, return 0
  151.  
  152. fmov fr3, fr7 ! restore L vector length from fr3
  153. fldi0 fr3
  154.  
  155. mov #0, r2 ! Compute Attenuation Factor
  156. cmp/gt r2, r1 ! check boolean flag for light read pos
  157. bf .READATT1 ! this means light is not a spot light
  158. bt .READATT0 ! this means light is a spot light
  159.  
  160. .READATT1:
  161. fmov @r4+, fr12 ! read past spot light direction x
  162. fmov @r4+, fr12 ! read past spot light direction y
  163. fmov @r4+, fr12 ! read past spot light direction z
  164. fmov @r4+, fr12 ! read past spot light CutOff
  165. .READATT0:
  166. fmov @r4+, fr12 ! load Kc light attenuation factor
  167. fmov @r4+, fr13 ! load Kl light attenuation factor
  168. fmov @r4+, fr14 ! load Kq light attenuation factor
  169. fmov @r4+, fr15 ! light exponent - not implemented
  170.  
  171. ! fr13 = Attenuation = 1.0f / (light->Kc + light->Kl * d + light->Kq * d * d);
  172. fmul fr7, fr13 ! light->Kl * d
  173. fmul fr7, fr14 ! light->Kq * d * d
  174. fmul fr7, fr14
  175. fadd fr13, fr12
  176. fadd fr14, fr12
  177. fldi1 fr13
  178. fdiv fr12, fr13 ! finsh Attenuation calculation
  179.  
  180. .RET0s:
  181. fmov fr7, @r6 ! Write D(Diffuse) factor to output
  182. fmov @r6+, fr7 ! Move write address
  183. fmov fr13, @r6 ! write A(Attenuation) factor to output
  184.  
  185. rts
  186. nop
  187.  
  188. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  189.  
  190.  
  191. EDIT:
  192. As that was the first assembly piece I've ever written, I have realized some problems with that code.
  193. I have since corrected the branching errors, and optimized a few things.
  194. Here is my current code that is producing correct results while being much faster than the C/inline asm implementation:
  195.  
  196.  
  197. Spoiler: hide
  198.  
  199.  
  200. Code:
  201.  
  202. !int sh4_l3f( void * glLight, void * vertex6f, void * out );
  203. !fv0 = vertex position
  204. !fv4 = light position
  205. !fv8 = spotlight,normal
  206. !fv12 = misc
  207. !r0 = return value
  208. !r1 = boolean flag r1
  209. !r4 = [arg][void*] glLight Structure
  210. !r5 = [arg][void*] float6f Vertex Position / Normal
  211. !r6 = [arg][void*] float3 Output Write Address FOR D, A light factors
  212.  
  213. .globl _sh4_l3f
  214.  
  215. _sh4_l3f:
  216.  
  217. mov #0, r0 ! set return value r0 to 0
  218. mov #0, r1 ! set boolean flag r1 to 0
  219.  
  220. fschg ! switch to double precision floats
  221.  
  222. fmov @r5+, dr0 ! load vertex x,y position into fv0
  223. fmov @r4+, dr4 ! load light position x,y,z,w into fv4
  224. fmov @r4+, dr6
  225.  
  226. fschg ! switch back to single precision floats
  227.  
  228. fmov @r5+, fr2 ! load vertex z position to fr2
  229. fldi0 fr15 ! load 0 to fr15 to use for fcmp/gt 0
  230.  
  231.  
  232. fcmp/gt fr15, fr7 ! check light w component - fr7 set to 1 = gl spot light
  233. bf .LIGHT1
  234.  
  235. .SPOTLIGHT1: ! Handle Spot Light Calculations
  236.  
  237. fschg ! switch to double precision floats
  238.  
  239. fmov dr0, dr8 ! copy vertex postion to fv8 to hold normalized P->L vector
  240. fmov dr2, dr10
  241.  
  242. fschg ! switch back to single precision floats
  243.  
  244. fsub fr4, fr8 ! sub3f light position from vertex position for P->L vector
  245. fsub fr5, fr9
  246. fsub fr6, fr10
  247.  
  248. fipr fv8, fv8 ! normalize P->L vector
  249. fsqrt fr11
  250.  
  251. fcmp/gt fr15, fr11
  252. bf .SPOT1 ! normalized P->L w less than 0 - skip division
  253.  
  254. fdiv fr11, fr8 ! div3f for P->L normalization
  255. fdiv fr11, fr9
  256. fdiv fr11, fr10
  257.  
  258. .SPOT1: ! branch div3f for P->L normalization
  259.  
  260. mov #1, r1 ! set boolean flag r1 to 1 - Indicate BitStream Has Moved forward a vector3f
  261. fmov @r4+, fr12 ! load spot light direction
  262. fmov @r4+, fr13
  263. fmov @r4+, fr14
  264. fldi0 fr11 ! load 0 for P->L vector w component
  265.  
  266. fipr fv12, fv8 ! P->L dot Light Dir || fr11 now holds light cosDir
  267.  
  268. fmov @r4+, fr15 ! load light cutOff to fr15
  269.  
  270. fcmp/gt fr15, fr8 ! If cosDir > cutOff, vertex is outside of spot light, return 0
  271. bt .RETURN0
  272.  
  273. .LIGHT1: ! process vertex lighting
  274.  
  275. fsub fr0, fr4 ! transform Light position into L vector ( normalize(Lp-Vp) )
  276. fsub fr1, fr5
  277. fsub fr2, fr6
  278. fldi0 fr7 ! load 0 for L vector w component
  279. fldi0 fr15 ! load 0 to fr15 for fcmp/gt 0 comparison
  280.  
  281. fipr fv4, fv4 ! normalize L vector
  282. fsqrt fr7
  283.  
  284. fmov fr7, fr3 ! copy L vector length to fr3
  285.  
  286. fcmp/gt fr15, fr7
  287. bf .LIGHTN ! normalized L w less than 0 - skip division
  288.  
  289. fdiv fr7, fr4 ! div3f for L normalization
  290. fdiv fr7, fr5
  291. fdiv fr7, fr6
  292.  
  293. .LIGHTN: ! branch past L w division - load normal
  294.  
  295. fmov @r5+, fr8 ! load vertex normal to fv8
  296. fmov @r5+, fr9
  297. fmov @r5+, fr10
  298. fldi0 fr11 ! load 0 for vertex normal w component
  299. fldi0 fr7 ! load 0 for L w component
  300.  
  301. fipr fv8, fv4 ! N dot L || fr7 now holds Diffuse Magnitude
  302.  
  303. fcmp/gt fr15, fr7
  304. bf .RETURN0 ! Diffuse Mag < 0 - Return 0
  305.  
  306. fmov fr7, @r6 ! write Diffuse Mag to output
  307. add #4, r6
  308.  
  309. cmp/gt r0, r1 ! check if spot light read past vector3f light dir
  310. bt .READATTEN1
  311.  
  312. add #16, r4 ! if light is not spot light, read past spot factors
  313.  
  314. .READATTEN1: ! compute attenuation factors
  315.  
  316. fmov @r4+, fr12 ! Kc load light attenuation factors to fv12
  317. fmov @r4+, fr13 ! Kl
  318. fmov @r4+, fr14 ! Kq
  319. fldi1 fr15 ! load 1 to fr15
  320.  
  321. fmul fr3, fr13 ! perform attenuation calculations
  322. fmul fr3, fr14
  323. fmul fr3, fr14
  324.  
  325. fadd fr13, fr12
  326. fadd fr14, fr12
  327.  
  328. fdiv fr12, fr15 ! 1.0f / Kc + Kl * d + Kq * d * d
  329.  
  330. fmov fr15, @r6 ! Write Attenuation Factor to output
  331.  
  332. .RETURN1: ! vertex recieves light, diffuse and attenuation are written to output
  333.  
  334. mov #1, r0
  335. rts
  336. nop
  337.  
  338. .RETURN0: ! vertex recieves no light, return 0
  339. fldi0 fr3
  340. fmov fr3, @r6
  341. add #4, r6
  342. fmov fr3, @r6
  343.  
  344. mov #0, r0
  345. rts
  346. nop
  347.  
  348. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement