Guest User

Untitled

a guest
Mar 18th, 2018
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 14.35 KB | None | 0 0
  1. void InvertHorizontalRow16s8sTo16sBuffered(PIXEL *lowpass,              // Row of horizontal lowpass coefficients
  2.                                            int lowpass_quantization,    // lowpass quantization factor
  3.                                            PIXEL8S *highpass_data,      // Row of horizontal highpass coefficients
  4.                                            int highpass_quantization,   // highpass quantization factor
  5.                                            PIXEL *output,               // Row of reconstructed results
  6.                                            int width,                   // Length of each row of horizontal coefficients
  7.                                            PIXEL *buffer)               // Buffer to hold the dequantized values
  8. {
  9. #if _DECODE_LOWPASS_16S
  10.     const int column_step = 8;
  11.     const int last_column = width - 1;
  12.     int post_column = last_column - (last_column % column_step);
  13.     int column;
  14.  
  15.     PIXEL *highline = buffer;
  16.  
  17.     __m128i low1_epi16;     // Lowpass coefficients
  18.     __m128i low2_epi16;
  19.     __m128i high1_epi16;    // Current eight highpass coefficients
  20.     __m128i high2_epi16;    // Next eight highpass coefficients
  21.  
  22.     __m128i half_epi16 = _mm_set1_epi16(4);
  23.  
  24. #if _UNALIGNED
  25.     // The fast loop computes output points starting at the third column
  26.     __m128i *outptr = (__m128i *)&output[2];
  27. #else
  28.     // The fast loop merges values from different phases to allow aligned stores
  29.     __m128i *outptr = (__m128i *)&output[0];
  30.  
  31.     // Two 16-bit coefficients from the previous loop iteration
  32.     //short remainder[2];
  33. #endif
  34.  
  35.     PIXEL *colptr;
  36.  
  37.     int32_t even;
  38.     int32_t odd;
  39.     //int32_t lsb;
  40.  
  41.     PIXEL *highpass = (PIXEL *)highpass_data;
  42.  
  43.     // Adjust the end of the fast loop if necessary
  44.     if (post_column == last_column)
  45.         post_column -= column_step;
  46.  
  47.     // Undo quantization for the highpass row
  48. #if _DEQUANTIZE_IN_FSM
  49.     highline = highpass;
  50. #else
  51.     DequantizeBandRow16s(highpass, width, highpass_quantization, highline);
  52. #endif
  53. #if (0)
  54.     int x;
  55.     printf("highline \n");
  56.     for (x = 0; x < width/2; x++){
  57.         printf("%d ,", highline[x]);
  58.     }
  59.     printf("\n");
  60.  
  61. #endif
  62.     // Start processing at the beginning of the row
  63.     column = 0;
  64.  
  65.     // Process the first two output points with special filters for the left border
  66.     even = 0;
  67.     odd = 0;
  68.  
  69.     // Apply the even reconstruction filter to the lowpass band
  70.     even += 11 * lowpass[column + 0];
  71.     even -=  4 * lowpass[column + 1];
  72.     even +=  1 * lowpass[column + 2];
  73.     even += 4;
  74.     even >>= 3;
  75.  
  76.     // Add the highpass correction
  77.     even += highline[column];
  78.     even = DivideByShift(even, 1);
  79.  
  80. #if _UNALIGNED
  81.     printf("hello amigo\n");
  82.     // Place the even result in the even column
  83.     output[0] = SATURATE(even);
  84. #else
  85.     // The output value will be stored later
  86.     //remainder[0] = SATURATE(even);
  87. #endif
  88.  
  89.     // Apply the odd reconstruction filter to the lowpass band
  90.     odd += 5 * lowpass[column + 0];
  91.     odd += 4 * lowpass[column + 1];
  92.     odd -= 1 * lowpass[column + 2];
  93.     odd += 4;
  94.     odd >>= 3;
  95.  
  96.     // Subtract the highpass correction
  97.     odd -= highline[column];
  98.     odd = DivideByShift(odd, 1);
  99.  
  100. #if _UNALIGNED
  101.     // Place the odd result in the odd column
  102.     output[1] = SATURATE(odd);
  103. #else
  104.     // The output value will be stored later
  105.     //remainder[1] = SATURATE(odd);
  106. #endif
  107.  
  108. /*
  109.     colptr = &output[2];
  110.     for (column = 2; column < 8; column++)
  111.     {
  112.         int32_t even = 0;       // Result of convolution with even filter
  113.         int32_t odd = 0;        // Result of convolution with odd filter
  114.  
  115.         // Apply the even reconstruction filter to the lowpass band
  116.         even += lowpass[column - 1];
  117.         even -= lowpass[column + 1];
  118.         even += ROUNDING(even,8);
  119.         even = DivideByShift(even, 3);
  120.         even += lowpass[column + 0];
  121.  
  122.         // Add the highpass correction
  123.         even += highline[column];
  124.         even = DivideByShift(even, 1);
  125.  
  126.         // Place the even result in the even column
  127.         *(colptr++) = SATURATE(even);
  128.  
  129.         // Apply the odd reconstruction filter to the lowpass band
  130.         odd -= lowpass[column - 1];
  131.         odd += lowpass[column + 1];
  132.         odd += ROUNDING(odd,8);
  133.         odd = DivideByShift(odd, 3);
  134.         odd += lowpass[column + 0];
  135.  
  136.         // Subtract the highpass correction
  137.         odd -= highline[column];
  138.         odd = DivideByShift(odd, 1);
  139.  
  140.         // Place the odd result in the odd column
  141.         *(colptr++) = SATURATE(odd);
  142.     }
  143. */
  144.  
  145. #if (0 && _FASTLOOP && XMMOPT)
  146.     printf("XMMOPT");
  147.     // Preload the first eight lowpass coefficients
  148.     low1_epi16 = _mm_load_si128((__m128i *)&lowpass[column]);
  149. //  low1_epi16 = _mm_adds_epi16(low1_epi16, overflowprotect_epi16);
  150.  
  151.     // Preload the first eight highpass coefficients
  152.     high1_epi16 = _mm_load_si128((__m128i *)&highline[column]);
  153.  
  154.     // The reconstruction filters use pixels starting at the first column
  155.     for (; column < post_column; column += column_step)
  156.     {
  157.         __m128i even_epi16;     // Result of convolution with even filter
  158.         __m128i odd_epi16;      // Result of convolution with odd filter
  159.         __m128i temp_epi16;
  160.         __m128i out_epi16;      // Reconstructed data
  161.         //__m128i high_epi16;
  162.         uint32_t temp;      // Temporary register for last two values
  163.  
  164.  
  165.         // Preload the next eight lowpass coefficients
  166.         low2_epi16 = _mm_load_si128((__m128i *)&lowpass[column+8]);
  167.  
  168. //      low2_epi16 = _mm_adds_epi16(low2_epi16, overflowprotect_epi16);
  169.  
  170.         // Compute the first two even and two odd output points //
  171.  
  172.         // Apply the even reconstruction filter to the lowpass band
  173. /*      even_epi16 = low1_epi16;
  174.         temp_epi16 = _mm_slli_epi16(low1_epi16, 3);
  175.         temp_epi16 = _mm_srli_si128(temp_epi16, 1*2);
  176.         even_epi16 = _mm_adds_epi16(even_epi16, temp_epi16);
  177.         temp_epi16 = _mm_srli_si128(low1_epi16, 2*2);
  178.         even_epi16 = _mm_subs_epi16(even_epi16, temp_epi16);
  179.  
  180. #if 1
  181.         // Apply the rounding adjustment
  182.         even_epi16 = _mm_adds_epi16(even_epi16, _mm_set1_epi16(4));
  183. #endif
  184.         // Divide by eight
  185.         even_epi16 = _mm_srai_epi16(even_epi16, 3);
  186. */
  187.         // better math.
  188.         even_epi16 = low1_epi16;
  189.         temp_epi16 = _mm_srli_si128(even_epi16, 2*2);
  190.         even_epi16 = _mm_subs_epi16(even_epi16, temp_epi16);
  191.         even_epi16 = _mm_adds_epi16(even_epi16, half_epi16);
  192.         even_epi16 = _mm_srai_epi16(even_epi16, 3);
  193.         temp_epi16 = _mm_srli_si128(low1_epi16, 1*2);
  194.         even_epi16 = _mm_adds_epi16(even_epi16, temp_epi16);
  195.  
  196.         // Shift the highpass correction by one column
  197.         high1_epi16 = _mm_srli_si128(high1_epi16, 1*2);
  198.  
  199.         // Prescale for 8bit output - DAN 4/5/02
  200.         //high_epi16 = _mm_slli_epi16(high1_epi16, prescale);
  201.  
  202.         // Add the highpass correction and divide by two
  203.         even_epi16 = _mm_adds_epi16(even_epi16, high1_epi16);
  204.         even_epi16 = _mm_srai_epi16(even_epi16, 1);
  205.  
  206.         // Apply the odd reconstruction filter to the lowpass band
  207. /*      odd_epi16 = _mm_slli_epi16(low1_epi16, 3);
  208.         odd_epi16 = _mm_srli_si128(odd_epi16, 1*2);
  209.         temp_epi16 = _mm_srli_si128(low1_epi16, 2*2);
  210.         odd_epi16 = _mm_adds_epi16(odd_epi16, temp_epi16);
  211.         odd_epi16 = _mm_subs_epi16(odd_epi16, low1_epi16);
  212.  
  213. #if 1
  214.         // Apply the rounding adjustment
  215.         odd_epi16 = _mm_adds_epi16(odd_epi16, _mm_set1_epi16(4));
  216. #endif
  217.         // Divide by eight
  218.         odd_epi16 = _mm_srai_epi16(odd_epi16, 3);
  219. */
  220.         // Apply the odd reconstruction filter to the lowpass band
  221.         // better math.
  222.         odd_epi16 = _mm_srli_si128(low1_epi16, 2*2);
  223.         temp_epi16 = low1_epi16;
  224.         odd_epi16 = _mm_subs_epi16(odd_epi16, temp_epi16);
  225.         odd_epi16 = _mm_adds_epi16(odd_epi16, half_epi16);
  226.         odd_epi16 = _mm_srai_epi16(odd_epi16, 3);
  227.         temp_epi16 = _mm_srli_si128(low1_epi16, 1*2);
  228.         odd_epi16 = _mm_adds_epi16(odd_epi16, temp_epi16);
  229.  
  230.         // Subtract the highpass correction and divide by two
  231.         odd_epi16 = _mm_subs_epi16(odd_epi16, high1_epi16);
  232.         odd_epi16 = _mm_srai_epi16(odd_epi16, 1);
  233.  
  234.         // Interleave the first four even and odd results
  235.         out_epi16 = _mm_unpacklo_epi16(even_epi16, odd_epi16);
  236.         //out_epi16 = _mm_max_epi16(out_epi16, _mm_setzero_si64());
  237.  
  238. #if _UNALIGNED
  239.         // Store the first eight output values
  240.         _mm_storeu_si128(outptr++, out_epi16);
  241. #else
  242.         // Combine the new output values with the two values from the previous phase
  243.         out_epi16 = _mm_shuffle_epi32(out_epi16, _MM_SHUFFLE(2, 1, 0, 3));
  244.         temp = _mm_cvtsi128_si32(out_epi16);
  245.         out_epi16 = _mm_insert_epi16(out_epi16, even, 0);
  246.         out_epi16 = _mm_insert_epi16(out_epi16, odd, 1);
  247.  
  248.         // Store eight output values
  249.         _mm_store_si128(outptr++, out_epi16);
  250.  
  251.         // Save the remaining two output values
  252.         //*((int *)remainder) = temp;
  253.         even = (short)temp;
  254.         odd = (short)(temp >> 16);
  255. #endif
  256.  
  257.         // Compute the second four even and four odd output points //
  258.  
  259.         // Preload the highpass correction
  260.         high2_epi16 = _mm_load_si128((__m128i *)&highline[column+8]);
  261.  
  262.         // Shift in the new pixels for the next stage of the loop
  263.         low1_epi16 = _mm_srli_si128(low1_epi16, 4*2);
  264.         temp_epi16 = _mm_slli_si128(low2_epi16, 4*2);
  265.         low1_epi16 = _mm_or_si128(low1_epi16, temp_epi16);
  266.  
  267. /*      // Apply the even reconstruction filter to the lowpass band
  268.         even_epi16 = low1_epi16;
  269.         temp_epi16 = _mm_slli_epi16(low1_epi16, 3);
  270.         temp_epi16 = _mm_srli_si128(temp_epi16, 1*2);
  271.         even_epi16 = _mm_adds_epi16(even_epi16, temp_epi16);
  272.         temp_epi16 = _mm_srli_si128(low1_epi16, 2*2);
  273.         even_epi16 = _mm_subs_epi16(even_epi16, temp_epi16);
  274.  
  275. #if 1
  276.         // Apply the rounding adjustment
  277.         even_epi16 = _mm_adds_epi16(even_epi16, _mm_set1_epi16(4));
  278. #endif
  279.         // Divide by eight
  280.         even_epi16 = _mm_srai_epi16(even_epi16, 3);
  281. */
  282.         // better math.
  283.         even_epi16 = low1_epi16;
  284.         temp_epi16 = _mm_srli_si128(even_epi16, 2*2);
  285.         even_epi16 = _mm_subs_epi16(even_epi16, temp_epi16);
  286.         even_epi16 = _mm_adds_epi16(even_epi16, half_epi16);
  287.         even_epi16 = _mm_srai_epi16(even_epi16, 3);
  288.         temp_epi16 = _mm_srli_si128(low1_epi16, 1*2);
  289.         even_epi16 = _mm_adds_epi16(even_epi16, temp_epi16);
  290.  
  291.  
  292.         // Shift in the next four highpass coefficients
  293.         high1_epi16 = _mm_srli_si128(high1_epi16, 4*2);
  294.         temp_epi16 = _mm_slli_si128(high2_epi16, 3*2);
  295.         high1_epi16 = _mm_or_si128(high1_epi16, temp_epi16);
  296.  
  297.         // Prescale for 8bit output - DAN 4/5/02
  298.         //high_epi16 = _mm_slli_epi16(high1_epi16, prescale);
  299.  
  300.         // Add the highpass correction and divide by two
  301.         even_epi16 = _mm_adds_epi16(even_epi16, high1_epi16);
  302.         even_epi16 = _mm_srai_epi16(even_epi16, 1);
  303.  
  304.         // Apply the odd reconstruction filter to the lowpass band
  305. /*      odd_epi16 = _mm_slli_epi16(low1_epi16, 3);
  306.         odd_epi16 = _mm_srli_si128(odd_epi16, 1*2);
  307.         temp_epi16 = _mm_srli_si128(low1_epi16, 2*2);
  308.         odd_epi16 = _mm_adds_epi16(odd_epi16, temp_epi16);
  309.         odd_epi16 = _mm_subs_epi16(odd_epi16, low1_epi16);
  310.  
  311. #if 1
  312.         // Apply the rounding adjustment
  313.         odd_epi16 = _mm_adds_epi16(odd_epi16, _mm_set1_epi16(4));
  314. #endif
  315.         // Divide by eight
  316.         odd_epi16 = _mm_srai_epi16(odd_epi16, 3);
  317. */
  318.  
  319.         // Apply the odd reconstruction filter to the lowpass band
  320.         // better math.
  321.         odd_epi16 = _mm_srli_si128(low1_epi16, 2*2);
  322.         temp_epi16 = low1_epi16;
  323.         odd_epi16 = _mm_subs_epi16(odd_epi16, temp_epi16);
  324.         odd_epi16 = _mm_adds_epi16(odd_epi16, half_epi16);
  325.         odd_epi16 = _mm_srai_epi16(odd_epi16, 3);
  326.         temp_epi16 = _mm_srli_si128(low1_epi16, 1*2);
  327.  
  328.         odd_epi16 = _mm_adds_epi16(odd_epi16, temp_epi16);
  329.         // Subtract the highpass correction and divide by two
  330.         odd_epi16 = _mm_subs_epi16(odd_epi16, high1_epi16);
  331.         odd_epi16 = _mm_srai_epi16(odd_epi16, 1);
  332.  
  333.         // Interleave the second four even and odd results
  334.         out_epi16 = _mm_unpacklo_epi16(even_epi16, odd_epi16);
  335.         //out_epi16 = _mm_max_epi16(out_epi16, _mm_setzero_si64());
  336.  
  337. #if _UNALIGNED
  338.         // Store the first eight output values
  339.         _mm_storeu_si128(outptr++, out_epi16);
  340. #else
  341.         // Combine the new output values with the two values from the previous phase
  342.         out_epi16 = _mm_shuffle_epi32(out_epi16, _MM_SHUFFLE(2, 1, 0, 3));
  343.         temp = _mm_cvtsi128_si32(out_epi16);
  344.         out_epi16 = _mm_insert_epi16(out_epi16, even, 0);
  345.         out_epi16 = _mm_insert_epi16(out_epi16, odd, 1);
  346.  
  347.         // Store eight output values
  348.         _mm_store_si128(outptr++, out_epi16);
  349.  
  350.         // Save the remaining two output values
  351.         even = (short)temp;
  352.         odd = (short)(temp >> 16);
  353. #endif
  354.  
  355.         // Prepare for the next loop iteration //
  356.  
  357.         // The second eight lowpass coefficients will be the current values
  358.         low1_epi16 = low2_epi16;
  359.  
  360.         // The second eight highpass coefficients will be the current values
  361.         high1_epi16 = high2_epi16;
  362.     }
  363.  
  364.     // Should have exited the loop with the column equal to the post processing column
  365.     assert(column == post_column);
  366.  
  367. #endif
  368.  
  369.     // The fast processing loop is one column behind the actual column
  370.     column++;
  371.  
  372.     // Get the pointer to the next output value
  373.     colptr = (PIXEL *)outptr;
  374.  
  375. #if _UNALIGNED
  376.     // The last two output points have already been stored
  377. #else
  378.     // Store the last two output points produced by the loop
  379.     *(colptr++) = SATURATE(even);
  380.     *(colptr++) = SATURATE(odd);
  381. #endif
  382.     // Process the rest of the columns up to the last column in the row
  383.     for (; column < last_column; column++)
  384.     {
  385.         int32_t even = 0;       // Result of convolution with even filter
  386.         int32_t odd = 0;        // Result of convolution with odd filter
  387.  
  388.         // Apply the even reconstruction filter to the lowpass band
  389.         even = lowpass[column - 1];
  390.         even -= lowpass[column + 1];
  391.         even += 4; //DAN20050921
  392.         even >>= 3;
  393.         even += lowpass[column + 0];
  394.  
  395.         // Add the highpass correction
  396.         even += highpass[column];
  397.         even = DivideByShift(even, 1);
  398.  
  399.         // Place the even result in the even column
  400.         //even >>= _INVERSE_TEMPORAL_PRESCALE;
  401.         *(colptr++) = SATURATE(even);
  402.  
  403.         // Apply the odd reconstruction filter to the lowpass band
  404.         odd = -lowpass[column - 1];
  405.         odd += lowpass[column + 1];
  406.         odd += 4; //DAN20050921
  407.         odd >>= 3;
  408.         odd += lowpass[column + 0];
  409.  
  410.         // Subtract the highpass correction
  411.         odd -= highpass[column];
  412.         odd = DivideByShift(odd, 1);
  413.  
  414.  
  415.         // Place the odd result in the odd column
  416.         //odd >>= _INVERSE_TEMPORAL_PRESCALE;
  417.         *(colptr++) = SATURATE(odd);
  418.     }
  419.  
  420.     // Should have exited the loop at the column for right border processing
  421.     assert(column == last_column);
  422.  
  423.     // Process the last two output points with special filters for the right border
  424.     even = 0;
  425.     odd = 0;
  426.  
  427.     // Apply the even reconstruction filter to the lowpass band
  428.     even += 5 * lowpass[column + 0];
  429.     even += 4 * lowpass[column - 1];
  430.     even -= 1 * lowpass[column - 2];
  431.     even += 4;
  432.     even >>= 3;
  433.  
  434.     // Add the highpass correction
  435.     even += highline[column];
  436.     even = DivideByShift(even, 1);
  437.  
  438.     // Place the even result in the even column
  439.     *(colptr++) = SATURATE(even);
  440.  
  441.     // Apply the odd reconstruction filter to the lowpass band
  442.     odd += 11 * lowpass[column + 0];
  443.     odd -=  4 * lowpass[column - 1];
  444.     odd +=  1 * lowpass[column - 2];
  445.     odd += 4;
  446.     odd >>= 3;
  447.  
  448.     // Subtract the highpass correction
  449.     odd -= highline[column];
  450.     odd = DivideByShift(odd, 1);
  451.  
  452.     // Place the odd result in the odd column
  453.     *(colptr++) = SATURATE(odd);
  454.  
  455. #else
  456. #error Have not implemented 8-bit lowpass coefficients
  457. #endif
  458. }
  459.  
  460. #endif
Add Comment
Please, Sign In to add comment