Advertisement
Guest User

CMSIS arm_biquad_cascade_df1_q31

a guest
Oct 16th, 2014
311
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 12.84 KB | None | 0 0
  1. /* ----------------------------------------------------------------------    
  2. * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
  3. *    
  4. * $Date:        12. March 2014
  5. * $Revision:    V1.4.4
  6. *    
  7. * Project:      CMSIS DSP Library    
  8. * Title:        arm_biquad_cascade_df1_q31.c    
  9. *    
  10. * Description:  Processing function for the    
  11. *               Q31 Biquad cascade filter    
  12. *    
  13. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14. *  
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. *   - Redistributions of source code must retain the above copyright
  19. *     notice, this list of conditions and the following disclaimer.
  20. *   - Redistributions in binary form must reproduce the above copyright
  21. *     notice, this list of conditions and the following disclaimer in
  22. *     the documentation and/or other materials provided with the
  23. *     distribution.
  24. *   - Neither the name of ARM LIMITED nor the names of its contributors
  25. *     may be used to endorse or promote products derived from this
  26. *     software without specific prior written permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39. * POSSIBILITY OF SUCH DAMAGE.    
  40. * -------------------------------------------------------------------- */
  41.  
  42. #include "arm_math.h"
  43.  
  44. /**    
  45.  * @ingroup groupFilters    
  46.  */
  47.  
  48. /**    
  49.  * @addtogroup BiquadCascadeDF1    
  50.  * @{    
  51.  */
  52.  
  53. /**    
  54.  * @brief Processing function for the Q31 Biquad cascade filter.    
  55.  * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.    
  56.  * @param[in]  *pSrc      points to the block of input data.    
  57.  * @param[out] *pDst      points to the block of output data.    
  58.  * @param[in]  blockSize  number of samples to process per call.    
  59.  * @return none.    
  60.  *    
  61.  * <b>Scaling and Overflow Behavior:</b>    
  62.  * \par    
  63.  * The function is implemented using an internal 64-bit accumulator.    
  64.  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.    
  65.  * Thus, if the accumulator result overflows it wraps around rather than clip.    
  66.  * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).    
  67.  * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to    
  68.  * 1.31 format by discarding the low 32 bits.    
  69.  *    
  70.  * \par    
  71.  * Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.    
  72.  */
  73.  
  74. void arm_biquad_cascade_df1_q31(
  75.   const arm_biquad_casd_df1_inst_q31 * S,
  76.   q31_t * pSrc,
  77.   q31_t * pDst,
  78.   uint32_t blockSize)
  79. {
  80.   q63_t acc;                                     /*  accumulator                   */
  81.   uint32_t uShift = ((uint32_t) S->postShift + 1u);
  82.   uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
  83.   q31_t *pIn = pSrc;                             /*  input pointer initialization  */
  84.   q31_t *pOut = pDst;                            /*  output pointer initialization */
  85.   q31_t *pState = S->pState;                     /*  pState pointer initialization */
  86.   q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */
  87.   q31_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
  88.   q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
  89.   q31_t Xn;                                      /*  temporary input               */
  90.   uint32_t sample, stage = S->numStages;         /*  loop counters                     */
  91.  
  92.  
  93. #ifndef ARM_MATH_CM0_FAMILY_FAMILY
  94.  
  95.   q31_t acc_l, acc_h;                            /*  temporary output variables    */
  96.  
  97.   /* Run the below code for Cortex-M4 and Cortex-M3 */
  98.  
  99.   do
  100.   {
  101.     /* Reading the coefficients */
  102.     b0 = *pCoeffs++;
  103.     b1 = *pCoeffs++;
  104.     b2 = *pCoeffs++;
  105.     a1 = *pCoeffs++;
  106.     a2 = *pCoeffs++;
  107.  
  108.     /* Reading the state values */
  109.     Xn1 = pState[0];
  110.     Xn2 = pState[1];
  111.     Yn1 = pState[2];
  112.     Yn2 = pState[3];
  113.  
  114.     /* Apply loop unrolling and compute 4 output values simultaneously. */
  115.     /*      The variable acc hold output values that are being computed:    
  116.      *    
  117.      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]    
  118.      */
  119.  
  120.     sample = blockSize >> 2u;
  121.  
  122.     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
  123.      ** a second loop below computes the remaining 1 to 3 samples. */
  124.     while(sample > 0u)
  125.     {
  126.       /* Read the input */
  127.       Xn = *pIn++;
  128.  
  129.       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  130.  
  131.       /* acc =  b0 * x[n] */
  132.       acc = (q63_t) b0 *Xn;
  133.       /* acc +=  b1 * x[n-1] */
  134.       acc += (q63_t) b1 *Xn1;
  135.       /* acc +=  b[2] * x[n-2] */
  136.       acc += (q63_t) b2 *Xn2;
  137.       /* acc +=  a1 * y[n-1] */
  138.       acc += (q63_t) a1 *Yn1;
  139.       /* acc +=  a2 * y[n-2] */
  140.       acc += (q63_t) a2 *Yn2;
  141.  
  142.       /* The result is converted to 1.31 , Yn2 variable is reused */
  143.  
  144.       /* Calc lower part of acc */
  145.       acc_l = acc & 0xffffffff;
  146.  
  147.       /* Calc upper part of acc */
  148.       acc_h = (acc >> 32) & 0xffffffff;
  149.  
  150.       /* Apply shift for lower part of acc and upper part of acc */
  151.       Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
  152.  
  153.       /* Store the output in the destination buffer. */
  154.       *pOut++ = Yn2;
  155.  
  156.       /* Read the second input */
  157.       Xn2 = *pIn++;
  158.  
  159.       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  160.  
  161.       /* acc =  b0 * x[n] */
  162.       acc = (q63_t) b0 *Xn2;
  163.       /* acc +=  b1 * x[n-1] */
  164.       acc += (q63_t) b1 *Xn;
  165.       /* acc +=  b[2] * x[n-2] */
  166.       acc += (q63_t) b2 *Xn1;
  167.       /* acc +=  a1 * y[n-1] */
  168.       acc += (q63_t) a1 *Yn2;
  169.       /* acc +=  a2 * y[n-2] */
  170.       acc += (q63_t) a2 *Yn1;
  171.  
  172.  
  173.       /* The result is converted to 1.31, Yn1 variable is reused  */
  174.  
  175.       /* Calc lower part of acc */
  176.       acc_l = acc & 0xffffffff;
  177.  
  178.       /* Calc upper part of acc */
  179.       acc_h = (acc >> 32) & 0xffffffff;
  180.  
  181.  
  182.       /* Apply shift for lower part of acc and upper part of acc */
  183.       Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
  184.  
  185.       /* Store the output in the destination buffer. */
  186.       *pOut++ = Yn1;
  187.  
  188.       /* Read the third input  */
  189.       Xn1 = *pIn++;
  190.  
  191.       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  192.  
  193.       /* acc =  b0 * x[n] */
  194.       acc = (q63_t) b0 *Xn1;
  195.       /* acc +=  b1 * x[n-1] */
  196.       acc += (q63_t) b1 *Xn2;
  197.       /* acc +=  b[2] * x[n-2] */
  198.       acc += (q63_t) b2 *Xn;
  199.       /* acc +=  a1 * y[n-1] */
  200.       acc += (q63_t) a1 *Yn1;
  201.       /* acc +=  a2 * y[n-2] */
  202.       acc += (q63_t) a2 *Yn2;
  203.  
  204.       /* The result is converted to 1.31, Yn2 variable is reused  */
  205.       /* Calc lower part of acc */
  206.       acc_l = acc & 0xffffffff;
  207.  
  208.       /* Calc upper part of acc */
  209.       acc_h = (acc >> 32) & 0xffffffff;
  210.  
  211.  
  212.       /* Apply shift for lower part of acc and upper part of acc */
  213.       Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
  214.  
  215.       /* Store the output in the destination buffer. */
  216.       *pOut++ = Yn2;
  217.  
  218.       /* Read the forth input */
  219.       Xn = *pIn++;
  220.  
  221.       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  222.  
  223.       /* acc =  b0 * x[n] */
  224.       acc = (q63_t) b0 *Xn;
  225.       /* acc +=  b1 * x[n-1] */
  226.       acc += (q63_t) b1 *Xn1;
  227.       /* acc +=  b[2] * x[n-2] */
  228.       acc += (q63_t) b2 *Xn2;
  229.       /* acc +=  a1 * y[n-1] */
  230.       acc += (q63_t) a1 *Yn2;
  231.       /* acc +=  a2 * y[n-2] */
  232.       acc += (q63_t) a2 *Yn1;
  233.  
  234.       /* The result is converted to 1.31, Yn1 variable is reused  */
  235.       /* Calc lower part of acc */
  236.       acc_l = acc & 0xffffffff;
  237.  
  238.       /* Calc upper part of acc */
  239.       acc_h = (acc >> 32) & 0xffffffff;
  240.  
  241.       /* Apply shift for lower part of acc and upper part of acc */
  242.       Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
  243.  
  244.       /* Every time after the output is computed state should be updated. */
  245.       /* The states should be updated as:  */
  246.       /* Xn2 = Xn1    */
  247.       /* Xn1 = Xn     */
  248.       /* Yn2 = Yn1    */
  249.       /* Yn1 = acc    */
  250.       Xn2 = Xn1;
  251.       Xn1 = Xn;
  252.  
  253.       /* Store the output in the destination buffer. */
  254.       *pOut++ = Yn1;
  255.  
  256.       /* decrement the loop counter */
  257.       sample--;
  258.     }
  259.  
  260.     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
  261.      ** No loop unrolling is used. */
  262.     sample = (blockSize & 0x3u);
  263.  
  264.     while(sample > 0u)
  265.     {
  266.       /* Read the input */
  267.       Xn = *pIn++;
  268.  
  269.       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  270.  
  271.       /* acc =  b0 * x[n] */
  272.       acc = (q63_t) b0 *Xn;
  273.       /* acc +=  b1 * x[n-1] */
  274.       acc += (q63_t) b1 *Xn1;
  275.       /* acc +=  b[2] * x[n-2] */
  276.       acc += (q63_t) b2 *Xn2;
  277.       /* acc +=  a1 * y[n-1] */
  278.       acc += (q63_t) a1 *Yn1;
  279.       /* acc +=  a2 * y[n-2] */
  280.       acc += (q63_t) a2 *Yn2;
  281.  
  282.       /* The result is converted to 1.31  */
  283.       acc = acc >> lShift;
  284.  
  285.       /* Every time after the output is computed state should be updated. */
  286.       /* The states should be updated as:  */
  287.       /* Xn2 = Xn1    */
  288.       /* Xn1 = Xn     */
  289.       /* Yn2 = Yn1    */
  290.       /* Yn1 = acc    */
  291.       Xn2 = Xn1;
  292.       Xn1 = Xn;
  293.       Yn2 = Yn1;
  294.       Yn1 = (q31_t) acc;
  295.  
  296.       /* Store the output in the destination buffer. */
  297.       *pOut++ = (q31_t) acc;
  298.  
  299.       /* decrement the loop counter */
  300.       sample--;
  301.     }
  302.  
  303.     /*  The first stage goes from the input buffer to the output buffer. */
  304.     /*  Subsequent stages occur in-place in the output buffer */
  305.     pIn = pDst;
  306.  
  307.     /* Reset to destination pointer */
  308.     pOut = pDst;
  309.  
  310.     /*  Store the updated state variables back into the pState array */
  311.     *pState++ = Xn1;
  312.     *pState++ = Xn2;
  313.     *pState++ = Yn1;
  314.     *pState++ = Yn2;
  315.  
  316.   } while(--stage);
  317.  
  318. #else
  319.  
  320.   /* Run the below code for Cortex-M0 */
  321.  
  322.   do
  323.   {
  324.     /* Reading the coefficients */
  325.     b0 = *pCoeffs++;
  326.     b1 = *pCoeffs++;
  327.     b2 = *pCoeffs++;
  328.     a1 = *pCoeffs++;
  329.     a2 = *pCoeffs++;
  330.  
  331.     /* Reading the state values */
  332.     Xn1 = pState[0];
  333.     Xn2 = pState[1];
  334.     Yn1 = pState[2];
  335.     Yn2 = pState[3];
  336.  
  337.     /*      The variables acc holds the output value that is computed:        
  338.      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]        
  339.      */
  340.  
  341.     sample = blockSize;
  342.  
  343.     while(sample > 0u)
  344.     {
  345.       /* Read the input */
  346.       Xn = *pIn++;
  347.  
  348.       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  349.       /* acc =  b0 * x[n] */
  350.       acc = (q63_t) b0 *Xn;
  351.  
  352.       /* acc +=  b1 * x[n-1] */
  353.       acc += (q63_t) b1 *Xn1;
  354.       /* acc +=  b[2] * x[n-2] */
  355.       acc += (q63_t) b2 *Xn2;
  356.       /* acc +=  a1 * y[n-1] */
  357.       acc += (q63_t) a1 *Yn1;
  358.       /* acc +=  a2 * y[n-2] */
  359.       acc += (q63_t) a2 *Yn2;
  360.  
  361.       /* The result is converted to 1.31  */
  362.       acc = acc >> lShift;
  363.  
  364.       /* Every time after the output is computed state should be updated. */
  365.       /* The states should be updated as:  */
  366.       /* Xn2 = Xn1    */
  367.       /* Xn1 = Xn     */
  368.       /* Yn2 = Yn1    */
  369.       /* Yn1 = acc    */
  370.       Xn2 = Xn1;
  371.       Xn1 = Xn;
  372.       Yn2 = Yn1;
  373.       Yn1 = (q31_t) acc;
  374.  
  375.       /* Store the output in the destination buffer. */
  376.       *pOut++ = (q31_t) acc;
  377.  
  378.       /* decrement the loop counter */
  379.       sample--;
  380.     }
  381.  
  382.     /*  The first stage goes from the input buffer to the output buffer. */
  383.     /*  Subsequent stages occur in-place in the output buffer */
  384.     pIn = pDst;
  385.  
  386.     /* Reset to destination pointer */
  387.     pOut = pDst;
  388.  
  389.     /*  Store the updated state variables back into the pState array */
  390.     *pState++ = Xn1;
  391.     *pState++ = Xn2;
  392.     *pState++ = Yn1;
  393.     *pState++ = Yn2;
  394.  
  395.   } while(--stage);
  396.  
  397. #endif /*  #ifndef ARM_MATH_CM0_FAMILY_FAMILY */
  398. }
  399.  
  400.  
  401.  
  402.  
  403. /**    
  404.   * @} end of BiquadCascadeDF1 group    
  405.   */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement