Advertisement
Guest User

Dark Shikari

a guest
Sep 12th, 2008
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.42 KB | None | 0 0
  1. Index: encoder/macroblock.c
  2. ===================================================================
  3. --- encoder/macroblock.c (revision 736)
  4. +++ encoder/macroblock.c (working copy)
  5. @@ -96,7 +96,10 @@
  6. h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
  7.  
  8. if( h->mb.b_trellis )
  9. + {
  10. x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1 );
  11. + x264_quant_4x4_qns( h, dct4x4, i_qscale, idx, 1, DCT_LUMA_4x4 );
  12. + }
  13. else
  14. h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
  15.  
  16. @@ -118,7 +121,10 @@
  17. h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
  18.  
  19. if( h->mb.b_trellis )
  20. + {
  21. x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1 );
  22. + x264_quant_8x8_qns( h, dct8x8, i_qscale, idx, 1 );
  23. + }
  24. else
  25. h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8IY][i_qscale], h->quant8_bias[CQM_8IY][i_qscale] );
  26.  
  27. @@ -158,7 +164,10 @@
  28.  
  29. /* quant/scan/dequant */
  30. if( h->mb.b_trellis )
  31. + {
  32. x264_quant_4x4_trellis( h, dct4x4[1+i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1 );
  33. + x264_quant_4x4_qns( h, dct4x4[1+i], i_qscale, i, 1, DCT_LUMA_AC );
  34. + }
  35. else
  36. h->quantf.quant_4x4( dct4x4[1+i], h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
  37.  
  38. @@ -425,7 +434,10 @@
  39. if( h->mb.b_noise_reduction )
  40. x264_denoise_dct( h, (int16_t*)dct8x8[idx] );
  41. if( h->mb.b_trellis )
  42. + {
  43. x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0 );
  44. + x264_quant_8x8_qns( h, dct8x8[idx], i_qp, idx, 0 );
  45. + }
  46. else
  47. h->quantf.quant_8x8( dct8x8[idx], h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] );
  48.  
  49. @@ -477,7 +489,10 @@
  50. if( h->mb.b_noise_reduction )
  51. x264_denoise_dct( h, (int16_t*)dct4x4[idx] );
  52. if( h->mb.b_trellis )
  53. + {
  54. x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0 );
  55. + x264_quant_4x4_qns( h, dct4x4[idx], i_qp, idx, 0, DCT_LUMA_4x4 );
  56. + }
  57. else
  58. h->quantf.quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
  59.  
  60. Index: encoder/rdo.c
  61. ===================================================================
  62. --- encoder/rdo.c (revision 736)
  63. +++ encoder/rdo.c (working copy)
  64. @@ -503,7 +503,183 @@
  65. }
  66. }
  67.  
  68. +inline int variance( uint8_t *pix, int x, int y, int start )
  69. +{
  70. + uint8_t *p_src = pix + (x + start) + FENC_STRIDE * (y + start);
  71. + int sum = 0; int ssd = 0; int i,j;
  72. + for(i = 0; i < 3; i++)
  73. + for(j = 0; j < 3; j++)
  74. + {
  75. + int cur = p_src[j+i*FENC_STRIDE];
  76. + sum += cur; ssd += cur*cur;
  77. + }
  78. + return (9*ssd - (sum * sum));
  79. +}
  80.  
  81. +uint64_t weighted_error8( x264_t *h, int16_t dct[8][8], uint16_t inv_variance[8][8], int i_qp, int idx, int i_quant_cat)
  82. +{
  83. + uint8_t idct[8*FDEC_STRIDE];
  84. + int16_t ndct[8][8];
  85. + int i,j;
  86. + uint8_t *decpix = h->mb.pic.p_fdec[0]+(8*(idx&1))+FDEC_STRIDE*(((idx>>1)&1)<<3);
  87. + h->mc.copy[PIXEL_8x8]( idct, FDEC_STRIDE, decpix, FDEC_STRIDE, 8 );
  88. + memcpy( ndct, dct, sizeof(ndct) );
  89. + h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct );
  90. + h->quantf.dequant_8x8( ndct, h->dequant8_mf[CQM_8PY], i_qp );
  91. + h->dctf.add8x8_idct8( idct, ndct );
  92. + uint64_t error = 0;
  93. + uint8_t *pix = h->mb.pic.p_fenc[0]+(8*(idx&1))+FENC_STRIDE*(((idx>>1)&1)<<3);
  94. + for( i = 0; i < 8; i++ )
  95. + for( j = 0; j < 8; j++ )
  96. + {
  97. + int pix_error = idct[i+j*FDEC_STRIDE] - pix[i+j*FENC_STRIDE];
  98. + error += (pix_error*pix_error)*inv_variance[j][i];
  99. + }
  100. + x264_cabac_t cabac_tmp = h->cabac;
  101. + block_residual_write_cabac(h,&cabac_tmp,DCT_LUMA_8x8,idx,h->dct.luma8x8[idx],64);
  102. + error *= 38;
  103. + uint64_t bits = cabac_tmp.f8_bits_encoded;
  104. + bits *= lambda2_tab[i_quant_cat][i_qp];
  105. + error += (bits + 8) >> 4;
  106. + return error;
  107. +}
  108. +
  109. +uint64_t weighted_error4( x264_t *h, int16_t dct[4][4], uint16_t inv_variance[4][4], int i_qp, int idx, int i_quant_cat, int i_ctxBlockCat )
  110. +{
  111. + uint8_t idct[4*FDEC_STRIDE];
  112. + int16_t ndct[4][4];
  113. + int i,j;
  114. + uint8_t *decpix = h->mb.pic.p_fdec[0] + 4*block_idx_x[idx] + 4*FDEC_STRIDE*block_idx_y[idx];
  115. + h->mc.copy[PIXEL_4x4]( idct, FDEC_STRIDE, decpix, FDEC_STRIDE, 4 );
  116. + memcpy( ndct, dct, sizeof(ndct) );
  117. + h->zigzagf.scan_4x4( h->dct.block[idx].luma4x4, dct );
  118. + h->quantf.dequant_4x4( ndct, h->dequant4_mf[CQM_4PY], i_qp );
  119. + if( i_ctxBlockCat == DCT_LUMA_AC )
  120. + ndct[0][0] = dct[0][0];
  121. + h->dctf.add4x4_idct( idct, ndct );
  122. + uint64_t error = 0;
  123. + uint8_t *pix = h->mb.pic.p_fenc[0] + 4*block_idx_x[idx] + 4*FENC_STRIDE*block_idx_y[idx];
  124. + for( i = 0; i < 4; i++ )
  125. + for( j = 0; j < 4; j++ )
  126. + {
  127. + int pix_error = idct[i+j*FDEC_STRIDE] - pix[i+j*FENC_STRIDE];
  128. + error += ((pix_error*pix_error)*inv_variance[j][i]);
  129. + }
  130. + x264_cabac_t cabac_tmp = h->cabac;
  131. + block_residual_write_cabac(h,&cabac_tmp,i_ctxBlockCat,idx,h->dct.block[idx].luma4x4,16 - (i_ctxBlockCat == DCT_LUMA_AC));
  132. + error *= 38;
  133. + uint64_t bits = cabac_tmp.f8_bits_encoded;
  134. + bits *= lambda2_tab[i_quant_cat][i_qp];
  135. + error += (bits + 8) >> 4;
  136. + return error;
  137. +}
  138. +
  139. +void x264_quant_8x8_qns( x264_t *h, int16_t dct[8][8], int i_qp, int idx, int i_quant_cat )
  140. +{
  141. + int x,y,start;
  142. + uint16_t inv_variance[8][8];
  143. + int size = 8;
  144. + uint8_t *pix = h->mb.pic.p_fenc[0]+(8*(idx&1))+FENC_STRIDE*(((idx>>1)&1)<<3);
  145. + for(x = 0; x < size; x++)
  146. + for(y = 0; y < size; y++)
  147. + {
  148. + if(x == 0 || y == 0) start = 0;
  149. + else if(x == size - 1 || y == size - 1) start = -2;
  150. + else start = -1;
  151. + inv_variance[y][x] = 256 + 65536 / (variance(pix,x,y,start) + 180);
  152. + inv_variance[y][x] = 800 - inv_variance[y][x];
  153. + }
  154. + int i;
  155. + int runs = 0;
  156. + int changed[64];
  157. + for( ; runs < 1000; runs++)
  158. + {
  159. + int best_change_i = 0;
  160. + int64_t best_change = 0;
  161. + int best_change_amount = 0;
  162. + uint64_t orig_error = weighted_error8(h,dct,inv_variance,i_qp,idx,i_quant_cat);
  163. + for( i = 0; i < 64; i++ )
  164. + {
  165. + // if(changed[i]) continue;
  166. + int change = -1;
  167. + for(; change <= 1; change += 2)
  168. + {
  169. + dct[0][i] += change;
  170. + uint64_t new_error = weighted_error8(h,dct,inv_variance,i_qp,idx,i_quant_cat);
  171. + int64_t cur_change = orig_error - new_error;
  172. + if(cur_change > best_change)
  173. + {
  174. + dct[0][i] -= change;
  175. + best_change = cur_change;
  176. + best_change_i = i;
  177. + best_change_amount = change;
  178. + break;
  179. + }
  180. + dct[0][i] -= change;
  181. + }
  182. + }
  183. + if(best_change == 0) break;
  184. + else
  185. + {
  186. + changed[best_change_i] = 1;
  187. + dct[0][best_change_i] += best_change_amount;
  188. + }
  189. + }
  190. +}
  191. +
  192. +void x264_quant_4x4_qns( x264_t *h, int16_t dct[4][4], int i_qp, int idx, int i_quant_cat, int i_ctxBlockCat )
  193. +{
  194. + int x,y,start;
  195. + uint16_t inv_variance[4][4];
  196. + int size = 4;
  197. + uint8_t *pix = h->mb.pic.p_fenc[0] + 4*block_idx_x[idx] + 4*FENC_STRIDE*block_idx_y[idx];
  198. + for(x = 0; x < size; x++)
  199. + for(y = 0; y < size; y++)
  200. + {
  201. + if(x == 0 || y == 0) start = 0;
  202. + else if(x == size - 1 || y == size - 1) start = -2;
  203. + else start = -1;
  204. + inv_variance[y][x] = 256 + 65536 / (variance(pix,x,y,start) + 180);
  205. + inv_variance[y][x] = 800 - inv_variance[y][x];
  206. + }
  207. + int i;
  208. + int runs = 0;
  209. + int changed[16];
  210. + for( ; runs < 1000; runs++)
  211. + {
  212. + int best_change_i = 0;
  213. + int64_t best_change = 0;
  214. + int best_change_amount = 0;
  215. + uint64_t orig_error = weighted_error4(h,dct,inv_variance,i_qp,idx,i_quant_cat,i_ctxBlockCat);
  216. + for( i = (i_ctxBlockCat == DCT_LUMA_AC); i < 16; i++ )
  217. + {
  218. + //if(changed[i]) continue;
  219. + int change = -1;
  220. + for(; change <= 1; change += 2)
  221. + {
  222. + dct[0][i] += change;
  223. + uint64_t new_error = weighted_error4(h,dct,inv_variance,i_qp,idx,i_quant_cat,i_ctxBlockCat);
  224. + int64_t cur_change = orig_error - new_error;
  225. + if(cur_change > best_change)
  226. + {
  227. + dct[0][i] -= change;
  228. + best_change = cur_change;
  229. + best_change_i = i;
  230. + best_change_amount = change;
  231. + break;
  232. + }
  233. + dct[0][i] -= change;
  234. + }
  235. + }
  236. + if(best_change == 0) break;
  237. + else
  238. + {
  239. + changed[best_change_i] = 1;
  240. + dct[0][best_change_i] += best_change_amount;
  241. + }
  242. + }
  243. +}
  244. +
  245. void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
  246. int i_qp, int i_ctxBlockCat, int b_intra )
  247. {
  248.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement