Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- ASM optimization strategy:
- Chan A handler:
- Load r0 = a_i4, ip = a_i5, lr = a_i6, r1 = a_i7 - they will be in LS side halfwords.
- Get r0 = b_i0, ip = b_i1, lr = b_i2, r1 = b_i3 at the same time for free (in MS side halfwords.)
- Zero MS side of r11.
- Sum halfword-wise r0+ip+lr+r1+r11, store in r11.
- sum(a_i0..7) is in LS side of r11
- sum(b_i0..3) is in MS side of r11.
- zero extend LS side of r11 to r0, which will be int32_t i_avg.
- Chan B handler
- Load r0 = a_i0, ip = a_i1, lr = a_i2, r1 = a_i3 - they will be in LS side halfwords.
- Get r0 = b_i4, ip = b_i5, lr = b_i6, r1 = b_i7 at the same time for free (in MS side halfwords.)
- Zero LS side of r11.
- Sum halfword-wise r0+ip+lr+r1+r11, store in r11.
- sum(a_i0..3) is in LS side of r11
- sum(b_i4..7) is in MS side of r11.
- zero extend MS side of r11 to r0, which will be int32_t i_avg.
- COMPILED WITH -ffixed-r11
- */
- void chan_a_loop_cha_handler()
- {
- int time = TIM2->CNT;
- // Compiled with -Wno-strict-aliasing to suppress warnings:
- register uint32_t b0a4 asm("r0") = *((uint32_t*)&adc_data.a_i4);
- register uint32_t b1a5 asm("ip") = *((uint32_t*)&adc_data.a_i5);
- register uint32_t b2a6 asm("lr") = *((uint32_t*)&adc_data.a_i6);
- register int32_t i_avg asm("r0");
- asm volatile
- ("bfc r11, #16, #16\n\t"
- "uadd16 r11, r11, r0\n\t"
- "uadd16 r11, r11, ip\n\t"
- "uadd16 r11, r11, lr"
- /* outputs: */ :
- /* inputs: */ : "r" (b0a4), "r" (b1a5), "r" (b2a6)
- /* clobbers: */ : );
- register uint32_t b3a7 asm("r1") = *((uint32_t*)&adc_data.a_i7);
- asm volatile
- (
- "uadd16 r11, r11, r1\n\t"
- "uxth r0, r11"
- /* outputs: */ : "=r" (i_avg)
- /* inputs: */ : "r" (b3a7)
- /* clobbers: */ : );
- i_avg -= chans[0].cm_corr_table[(((uint16_t)chans[0].last_vdir)&0b1111110000000000)>>10];
- avg_i_accum[0] += i_avg;
- //unrelated stuff truncated
- }
- void chan_b_loop_cha_handler()
- {
- int time = TIM2->CNT;
- register uint32_t b4a0 asm("r0") = *((uint32_t*)&adc_data.a_i0);
- register uint32_t b5a1 asm("ip") = *((uint32_t*)&adc_data.a_i1);
- register uint32_t b6a2 asm("lr") = *((uint32_t*)&adc_data.a_i2);
- register int32_t i_avg asm("r0");
- asm volatile
- ("bfc r11, #0, #16\n\t"
- "uadd16 r11, r11, r0\n\t"
- "uadd16 r11, r11, ip\n\t"
- "uadd16 r11, r11, lr"
- /* outputs: */ :
- /* inputs: */ : "r" (b4a0), "r" (b5a1), "r" (b6a2)
- /* clobbers: */ : );
- register uint32_t b7a3 asm("r1") = *((uint32_t*)&adc_data.a_i3);
- asm volatile
- ("uadd16 r11, r11, r1\n\t"
- "uxth r0, r11, ROR #16"
- /* outputs: */ : "=r" (i_avg)
- /* inputs: */ : "r" (b7a3)
- /* clobbers: */ : );
- i_avg -= chans[1].cm_corr_table[(((uint16_t)chans[1].last_vdir)&0b1111110000000000)>>10];
- avg_i_accum[1] += i_avg;
- //unrelated stuff truncated
- }
- main()
- {
- . . .
- ADC1_2->CCR = 1UL<<23 /*Temp sensor enable*/ | 0b01UL<<16 /*clk=HCLK*/ |
- 0b10UL<<14 /*Dual mode DMA*/ | 1UL<<13 /*Dual mode DMA circular*/ |
- 0b00110UL /*Regular simultaneous dual mode*/;
- DMA1_Channel1->CPAR = (uint32_t)&(ADC1_2->CDR); // common 32-bit data register for dual-mode ADC
- DMA1_Channel1->CMAR = (uint32_t)(&adc_data);
- DMA1_Channel1->CNDTR = DMA_INITIAL_CNDTR;
- DMA1_Channel1->CCR = 0b11UL<<12 /*highest prio*/ | 0b10UL<<10 /*32b mem*/ | 0b10UL<<8 /*32b periph*/ |
- 1UL<<7 /*memory increment*/ | 1UL<<5 /*circular*/;
- . . .
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement