Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ;; Bare-metal startup code for Cortex-A8 on Beagle Board
- ;;
- ;; Vector table, reset handler, stacks, interrupt handler, cache & MMU config, NEON enable.
- ;;
- ;; Copyright ARM Ltd 2002-2012. All rights reserved.
- ; Standard definitions of mode bits and interrupt (I & F) flags in PSRs
- Mode_USR EQU 0x10
- Mode_FIQ EQU 0x11
- Mode_IRQ EQU 0x12
- Mode_SVC EQU 0x13
- Mode_ABT EQU 0x17
- Mode_UND EQU 0x1B
- Mode_SYS EQU 0x1F
- I_Bit EQU 0x80 ; when I bit is set, IRQ is disabled
- F_Bit EQU 0x40 ; when F bit is set, FIQ is disabled
- PRESERVE8
- AREA VECTORS, CODE, READONLY ; name this block of code
- ENTRY
- ; StartHere is the Entry point for the Reset handler
- EXPORT StartHere
- StartHere
- ;***********************
- ; Exception Vector Table
- ;***********************
- ; Note: LDR PC instructions are used here, though branch (B) instructions
- ; could also be used, unless the exception handlers are >32MB away.
- Vectors
- LDR PC, Reset_Addr
- LDR PC, Undefined_Addr
- LDR PC, SVC_Addr
- LDR PC, Prefetch_Addr
- LDR PC, Abort_Addr
- B . ; Reserved vector
- LDR PC, IRQ_Addr
- LDR PC, FIQ_Addr
- Reset_Addr DCD Reset_Handler
- Undefined_Addr DCD Undefined_Handler
- SVC_Addr DCD SVC_Handler
- Prefetch_Addr DCD Prefetch_Handler
- Abort_Addr DCD Abort_Handler
- IRQ_Addr DCD IRQ_Handler
- FIQ_Addr DCD FIQ_Handler
- ;***********************
- ; Exception Handlers
- ;***********************
- ; The following dummy handlers do not do anything useful in this example.
- ; They are set up here for completeness.
- Undefined_Handler
- B Undefined_Handler
- SVC_Handler
- B SVC_Handler
- Prefetch_Handler
- B Prefetch_Handler
- Abort_Handler
- B Abort_Handler
- ;;IRQ_Handler
- ;; implemented below
- FIQ_Handler
- B FIQ_Handler
- IRQ_Handler
- B IRQ_Handler
- ;***********************
- ; Interrupt Handler
- ;***********************
- ; This simple example is only able to handle interrupts one after another.
- ; For a more complex nested (re-entrant) interrupt handler example, refer to the documentation
- ; IMPORT C_interrupt_handler
- ;IRQ_Handler FUNCTION {r0-r12}
- ; PUSH {r0-r3, r12, lr}
- ; BL C_interrupt_handler
- ; POP {r0-r3, r12, lr}
- ;SUBS pc, lr, #4
- ;ENDFUNC
- ;***********************
- ; Stack definitions
- ;***********************
- ; Import stack base linker symbols for app and irq from scatter file. Stacks must be 8 byte aligned.
- IMPORT ||Image$$ARM_LIB_STACK$$ZI$$Limit||
- app_stack_base DCD ||Image$$ARM_LIB_STACK$$ZI$$Limit||
- IMPORT ||Image$$IRQ_STACK$$ZI$$Limit||
- irq_stack_base DCD ||Image$$IRQ_STACK$$ZI$$Limit||
- ;***********************
- ; Reset Handler
- ;***********************
- ; Disable cache and MMU, invalidate TLBs, fix vector table, setup stack pointers, setup & enable MMU, enable NEON, enter C library via __main(), then enable cache.
- Reset_Handler FUNCTION {}
- ; Disable caches, MMU and branch prediction in case they were left enabled from an earlier run
- ; This does not need to be done from a cold reset
- MRC p15, 0, r0, c1, c0, 0 ; Read CP15 System Control register
- BIC r0, r0, #(0x1 << 12) ; Clear I bit 12 to disable I Cache
- BIC r0, r0, #(0x1 << 2) ; Clear C bit 2 to disable D Cache
- BIC r0, r0, #0x1 ; Clear M bit 0 to disable MMU
- BIC r0, r0, #(0x1 << 11) ; Clear Z bit 11 to disable branch prediction
- MCR p15, 0, r0, c1, c0, 0 ; Write value back to CP15 System Control register
- ; The MMU is enabled later, before calling main(). Caches and branch prediction are enabled inside main(),
- ; after the MMU has been enabled and scatterloading has been performed.
- ; Invalidate Data and Instruction TLBs and branch predictor
- MOV r0,#0
- MCR p15, 0, r0, c8, c7, 0 ; I-TLB and D-TLB invalidation
- MCR p15, 0, r0, c7, c5, 6 ; BPIALL - Invalidate entire branch predictor array
- ; Set Vector Base Address Register (VBAR) to point to this application's vector table
- LDR r0, =Vectors
- MCR p15, 0, r0, c12, c0, 0
- ; Enter each mode used in turn to disable interrupts and set up the stack pointer
- ; In this simple example, only SVC and IRQ modes are used
- ; Stack pointers must be 8 byte aligned
- MSR CPSR_c, #Mode_IRQ :OR: I_Bit :OR: F_Bit
- LDR r0, irq_stack_base
- MOV sp, r0
- MSR CPSR_c, #Mode_SVC :OR: I_Bit :OR: F_Bit
- LDR r0, app_stack_base
- MOV sp, r0
- ; Continue in SVC mode
- ;==================================================================
- ; Cache Invalidation code for Cortex-A8
- ;==================================================================
- ; Invalidate L1 Instruction Cache
- MRC p15, 1, r0, c0, c0, 1 ; Read Cache Level ID Register (CLIDR)
- TST r0, #0x3 ; Harvard Cache?
- MOV r0, #0 ; SBZ
- MCRNE p15, 0, r0, c7, c5, 0 ; ICIALLU - Invalidate instruction cache and flush branch target cache
- ; Invalidate Data/Unified Caches
- MRC p15, 1, r0, c0, c0, 1 ; Read CLIDR
- ANDS r3, r0, #0x07000000 ; Extract coherency level
- MOV r3, r3, LSR #23 ; Total cache levels << 1
- BEQ Finished ; If 0, no need to clean
- MOV r10, #0 ; R10 holds current cache level << 1
- Loop1 ADD r2, r10, r10, LSR #1 ; R2 holds cache "Set" position
- MOV r1, r0, LSR r2 ; Bottom 3 bits are the Cache-type for this level
- AND r1, r1, #7 ; Isolate those lower 3 bits
- CMP r1, #2
- BLT Skip ; No cache or only instruction cache at this level
- MCR p15, 2, r10, c0, c0, 0 ; Write the Cache Size selection register
- ISB ; ISB to sync the change to the CacheSizeID reg
- MRC p15, 1, r1, c0, c0, 0 ; Reads current Cache Size ID register
- AND r2, r1, #7 ; Extract the line length field
- ADD r2, r2, #4 ; Add 4 for the line length offset (log2 16 bytes)
- LDR r4, =0x3FF
- ANDS r4, r4, r1, LSR #3 ; R4 is the max number on the way size (right aligned)
- CLZ r5, r4 ; R5 is the bit position of the way size increment
- LDR r7, =0x7FFF
- ANDS r7, r7, r1, LSR #13 ; R7 is the max number of the index size (right aligned)
- Loop2 MOV r9, r4 ; R9 working copy of the max way size (right aligned)
- Loop3 ORR r11, r10, r9, LSL r5 ; Factor in the Way number and cache number into R11
- ORR r11, r11, r7, LSL r2 ; Factor in the Set number
- MCR p15, 0, r11, c7, c6, 2 ; Invalidate by Set/Way
- SUBS r9, r9, #1 ; Decrement the Way number
- BGE Loop3
- SUBS r7, r7, #1 ; Decrement the Set number
- BGE Loop2
- Skip ADD r10, r10, #2 ; Increment the cache number
- CMP r3, r10
- BGT Loop1
- Finished
- ;===================================================================
- ; Cortex-A8 MMU Configuration
- ; Set translation table base
- ;===================================================================
- IMPORT ||Image$$APP_CODE$$Base|| ; From scatter file
- IMPORT ||Image$$TTB$$ZI$$Base|| ; from scatter file
- ; Cortex-A8 supports two translation tables
- ; Configure translation table base (TTB) control register cp15,c2
- ; to a value of all zeros, indicates we are using TTB register 0.
- MOV r0,#0x0
- MCR p15, 0, r0, c2, c0, 2
- ; write the address of our page table base to TTB register 0
- LDR r0,=||Image$$TTB$$ZI$$Base||
- MCR p15, 0, r0, c2, c0, 0
- ;===================================================================
- ; Cortex-A8 PAGE TABLE generation, using standard Arch v6 tables
- ;
- ; AP[11:10] - Access Permissions = b11, Read/Write Access
- ; Domain[8:5] - Domain = b1111, Domain 15
- ; Type[1:0] - Descriptor Type = b10, 1MB descriptors
- ;
- ; TEX C B
- ; 000 0 0 Strongly Ordered
- ; 000 1 1 Outer and Inner write back, no Write-allocate.
- ;===================================================================
- LDR r1,=0xfff ; loop counter
- LDR r2,=2_00000000000000000000110111100010
- ; r0 contains the address of the translation table base
- ; r1 is loop counter
- ; r2 is level1 descriptor (bits 19:0)
- ; use loop counter to create 4096 individual table entries.
- ; this writes from address 'Image$$TTB$$ZI$$Base' +
- ; offset 0x3FFC down to offset 0x0 in word steps (4 bytes)
- init_ttb_1
- ORR r3, r2, r1, LSL#20 ; r3 now contains full level1 descriptor to write
- STR r3, [r0, r1, LSL#2] ; str table entry at TTB base + loopcount*4
- SUBS r1, r1, #1 ; decrement loop counter
- BPL init_ttb_1
- ; In this example, the 1MB section based at '||Image$$APP_CODE$$Base||' is setup specially as cacheable (write back mode).
- ; TEX[14:12]=000 and CB[3:2]= 11, Outer and inner write back, no Write-allocate normal memory.
- LDR r1,=||Image$$APP_CODE$$Base|| ; Base physical address of code segment
- LSR r1,#20 ; Shift right to align to 1MB boundaries
- ORR r3, r2, r1, LSL#20 ; Setup the initial level1 descriptor again
- ORR r3,r3,#2_0000000001100 ; Set CB bits
- STR r3, [r0, r1, LSL#2] ; str table entry
- ;===================================================================
- ; Setup domain control register - Enable all domains to client mode
- ;===================================================================
- MRC p15, 0, r0, c3, c0, 0 ; Read Domain Access Control Register
- LDR r0, =0x55555555 ; Initialize every domain entry to b01 (client)
- MCR p15, 0, r0, c3, c0, 0 ; Write Domain Access Control Register
- ;===================================================================
- ; Setup L2 Cache - L2 Cache Auxiliary Control
- ;===================================================================
- ;; Seems to undef on Beagle ?
- ;; MOV r0, #0
- ;; MCR p15, 1, r0, c9, c0, 2 ; Write L2 Auxilary Control Register
- IF {TARGET_FEATURE_NEON} || {TARGET_FPU_VFP}
- ;==================================================================
- ; Enable access to NEON/VFP by enabling access to Coprocessors 10 and 11.
- ; Enables Full Access i.e. in both privileged and non privileged modes
- ;==================================================================
- MRC p15, 0, r0, c1, c0, 2 ; Read Coprocessor Access Control Register (CPACR)
- ORR r0, r0, #(0xF << 20) ; Enable access to CP 10 & 11
- MCR p15, 0, r0, c1, c0, 2 ; Write Coprocessor Access Control Register (CPACR)
- ISB
- ;==================================================================
- ; Switch on the VFP and NEON hardware
- ;=================================================================
- MOV r0, #0x40000000
- VMSR FPEXC, r0 ; Write FPEXC register, EN bit set
- ENDIF
- ;===================================================================
- ; Enable MMU and Branch to __main
- ; Leaving the caches disabled until after scatter loading.
- ;===================================================================
- IMPORT __main ; Before MMU enabled import label to __main
- LDR r12,=__main ; save this in register for possible long jump
- MRC p15, 0, r0, c1, c0, 0 ; Read CP15 System Control register
- BIC r0, r0, #(0x1 << 12) ; Clear I bit 12 to disable I Cache
- BIC r0, r0, #(0x1 << 2) ; Clear C bit 2 to disable D Cache
- BIC r0, r0, #0x2 ; Clear A bit 1 to disable strict alignment fault checking
- ORR r0, r0, #0x1 ; Set M bit 0 to enable MMU before scatter loading
- MCR p15, 0, r0, c1, c0, 0 ; Write CP15 System Control register
- ; Now the MMU is enabled, virtual to physical address translations will occur.
- ; This will affect the next instruction fetches.
- ;
- ; The two instructions currently in the ARM pipeline will have been fetched before the MMU was enabled.
- ; The branch to __main is safe because the Virtual Address (VA) is the same as the Physical Address (PA)
- ; (flat mapping) of this code that enables the MMU and performs the branch
- BX r12 ; Branch to __main() C library entry point
- ENDFUNC
- ;==================================================================
- ; Enable caches and branch prediction
- ; This code must be run from a privileged mode
- ;==================================================================
- AREA ENABLECACHES, CODE, READONLY
- EXPORT enable_caches
- enable_caches FUNCTION
- ;==================================================================
- ; Enable caches and branch prediction
- ;==================================================================
- MRC p15, 0, r0, c1, c0, 0 ; Read System Control Register
- ORR r0, r0, #(0x1 << 12) ; Set I bit 12 to enable I Cache
- ORR r0, r0, #(0x1 << 2) ; Set C bit 2 to enable D Cache
- ORR r0, r0, #(0x1 << 11) ; Set Z bit 11 to enable branch prediction
- MCR p15, 0, r0, c1, c0, 0 ; Write System Control Register
- ;==================================================================
- ; Enable Cortex-A8 Level2 Unified Cache
- ;==================================================================
- MRC p15, 0, r0, c1, c0, 1 ; Read Auxiliary Control Register
- ORR r0, #2 ; L2EN bit, enable L2 cache
- MCR p15, 0, r0, c1, c0, 1 ; Write Auxiliary Control Register
- BX lr
- ENDFUNC
- EXPORT disable_caches
- disable_caches FUNCTION
- MRC p15, 0, r0, c1, c0, 0 ; Read CP15 System Control register
- BIC r0, r0, #(0x1 << 12) ; Clear I bit 12 to disable I Cache
- BIC r0, r0, #(0x1 << 2) ; Clear C bit 2 to disable D Cache
- MCR p15, 0, r0, c1, c0, 0 ; Write CP15 System Control register
- BX lr
- ENDFUNC
- END
Advertisement
Add Comment
Please, Sign In to add comment