We are running a survey to help us improve the experience for all of our members. If you see the survey appear, please take the time to tell us about your experience if you can.
I am working on writing a minimalistic bare metal kernel for the raspberry pi 5 (having arm cortex a76). Everything was going on smoothly, until I came to enabling the MMU. I am not sure what I am missing here, but I think I am unable to initialise the translation tables for the MMU appropriately. Here is my bootstrap code for the kernel:
#include "system_registers.h" #include "mmu.h" .section ".text.boot" .global _start _start: mrs x7, mpidr_el1 // Multi-Processor Identifier(EL1) lsr x7, x7, #8 // Shift the identifier to the right by 8 bits. and x7, x7, #7 // Last 3 bits of the identifier extracted. Indicates CPU ID. cbz x7, intr_init // Proceed if on primary core, else wait for the primary core to initialize. // The default stub for the Raspberry Pi 5 looks for the first instruction at x80000, // and runs it on the primary core of the processor (generally core 0), and parks all // the remaining cores. If the address of a function is written onto the register at // which the respective core expects it, the core is then woken up from sleep to // execute that function. // delay for the secondary cores to wait for the primary core to initialize mov x0, #10000 core_init_delay: sub x0, x0, #1 cbz x0, intr_init b core_init_delay core_hang: wfe // Wait for event, and loop indefinitely b core_hang // Initialise required system registers required before jumping into EL1 exception level. intr_init: ldr x0, =SCTLR_VALUE_MMU_DISABLED msr sctlr_el1, x0 ldr x0, =HCR_VALUE msr hcr_el2, x0 ldr x0, =TCR_VALUE msr tcr_el1, x0 ldr x0, =MAIR_VALUE msr mair_el1, x0 ldr x0, =SPSR_VALUE msr spsr_el2, x0 adr x0, bss_init msr elr_el2, x0 eret bss_init: adrp x1, __bss_start // Load the start address of the BSS section, defined in the linker. adrp x2, __bss_size // Load the end address of the BSS section, defined in the linker. bss_loop: cbz x2, stack_init // Skip initialization of BSS variables if the size is `0` str xzr, [x1], #8 // Initialize all the BSS variables to `0` sub x2, x2, #1 cbnz x2, bss_loop // Iterate for all variables of the BSS section stack_init: bl __create_page_tables adrp x0, __user_end mov sp, x0 adrp x0, id_pg_dir msr ttbr0_el1, x0 adrp x0, high_pg_dir msr ttbr1_el1, x0 mrs x0, sctlr_el1 ldr x1, =SCTLR_MMU_ENABLED orr x0, x0, x1 msr sctlr_el1, x0 dsb sy mov x0, #0 // Pass core ID as an argument to the `kernel_main()` routine of the kernel bl kernel_main // Link and jump to the `kernel_main()` routine of the kernel b core_hang // If the `kernel_main()` routine of the kernel returns, loop indefinitely .macro create_table_entry, table, next_table, va_start, shift, t1, t2 lsr \t1, \va_start, #\shift // Shift the virtual address to the right by the shift value and \t1, \t1, #ENTRIES_PER_TABLE - 1 // Extract the index of the entry in the table, by masking the bits to the left of the index. mov \t2, \next_table // Load the address of the next table into a register orr \t2, \t2, #MM_TYPE_PAGE_TABLE // Set the type of the table to a page table str \t2, [\table, \t1, lsl #3] // Store the table descriptor entry in the table at the calculated index .endm .macro create_block_map, table, va_start, va_end, pa_start, flags, t1 lsr \va_start, \va_start, #SECTION_SHIFT // Shift the virtual address to the right by the section shift value and \va_start, \va_start, #ENTRIES_PER_TABLE - 1 lsr \va_end, \va_end, #SECTION_SHIFT sub \va_end, \va_end, #1 and \va_end, \va_end, #ENTRIES_PER_TABLE - 1 lsr \pa_start, \pa_start, #SECTION_SHIFT lsl \pa_start, \pa_start, #SECTION_SHIFT mov \t1, \flags orr \pa_start, \pa_start, \t1 9999: str \pa_start, [\table, \va_start, lsl #3] add \va_start, \va_start, #1 add \pa_start, \pa_start, #SECTION_SIZE cmp \va_start, \va_end b.le 9999b .endm __create_page_tables: mov x29, x30 // Save the return address adrp x0, id_pg_dir mov x1, #ID_MAP_TABLE_SIZE bl mem_init_zero adrp x0, id_pg_dir add x1, x0, #PAGE_SIZE eor x4, x4, x4 create_table_entry x0, x1, x4, PGD_SHIFT, x2, x3 add x0, x0, #PAGE_SIZE add x1, x1, #PAGE_SIZE create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 mov x0, x1 eor x2, x2, x2 ldr x3, =ID_MAP_SIZE eor x4, x4, x4 ldr x6, =MMU_KERNEL_FLAGS create_block_map x0, x2, x3, x4, x6, x5 adrp x0, high_pg_dir mov x1, #HIGH_MAP_TABLE_SIZE bl mem_init_zero adrp x0, high_pg_dir add x1, x0, #PAGE_SIZE ldr x4, =VA_START create_table_entry x0, x1, x4, PGD_SHIFT, x2, x3 add x0, x0, #PAGE_SIZE add x1, x1, #PAGE_SIZE ldr x4, =VA_START ldr x5, =PUD_ENTRY_MAP_SIZE create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 add x1, x1, #PAGE_SIZE add x4, x4, x5 create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 add x1, x1, #PAGE_SIZE add x4, x4, x5 create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 add x1, x1, #PAGE_SIZE add x4, x4, x5 create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 add x1, x1, #PAGE_SIZE ldr x4, =VA_START ldr x5, =PHYSICAL_DEVICE_START add x4, x4, x5 create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 ldr x10, =HIGH_MAP_FIRST_START ldr x11, =HIGH_MAP_FIRST_END ldr x12, =HIGH_MAP_SECOND_START ldr x13, =HIGH_MAP_SECOND_END ldr x14, =HIGH_MAP_THIRD_START ldr x15, =HIGH_MAP_THIRD_END ldr x16, =HIGH_MAP_FOURTH_START ldr x17, =HIGH_MAP_FOURTH_END ldr x18, =HIGH_MAP_DEVICE_START ldr x19, =HIGH_MAP_DEVICE_END ldr x20, =PHYSICAL_FIRST_START ldr x21, =PHYSICAL_SECOND_START ldr x22, =PHYSICAL_THIRD_START ldr x23, =PHYSICAL_FOURTH_START ldr x24, =PHYSICAL_DEVICE_START add x0, x0, #PAGE_SIZE mov x2, x10 mov x3, x11 mov x4, x20 create_block_map x0, x2, x3, x4, x6, x5 add x0, x0, #PAGE_SIZE mov x2, x12 mov x3, x13 mov x4, x21 create_block_map x0, x2, x3, x4, x6, x5 add x0, x0, #PAGE_SIZE mov x2, x14 mov x3, x15 mov x4, x22 create_block_map x0, x2, x3, x4, x6, x5 add x0, x0, #PAGE_SIZE mov x2, x16 mov x3, x17 mov x4, x23 create_block_map x0, x2, x3, x4, x6, x5 add x0, x0, #PAGE_SIZE mov x2, x18 mov x3, x19 mov x4, x24 ldr x6, =MMU_PERIPHERALS_FLAGS create_block_map x0, x2, x3, x4, x6, x5 mov x30, x29 ret
The code here, fails to branch to `kernel_main()` if it branches to `__create_page_tables`, and I don't understand why. I also had UART prints after every statement in the code, so as to check where the execution is hanging, and it turns out only the `bl kernel_main` statement is not taking place. Following is my linker:
SECTIONS { . = 0x80000; /* Start address of the kernel image */ .text : { KEEP(*(.text.boot)) *(.text .text.* .gnu.linkonce.t*) } .rodata : { *(.rodata .rodata.* .gnu.linkonce.r*) } /* PROVIDE -> for variables which are referenced but not initialized in the section */ PROVIDE(_data = .); /* Initialize data start address to current location pointer */ .data : { *(.data .data.* .gnu.linkonce.d*) } /* Section for initialized data */ .bss (NOLOAD) : { /* Section for uninitialized data; NOLOAD -> Don't allocate space in linking, will be allocated and initialized in runtime */ . = ALIGN(16); /* Align the current location pointer to the next 16bit boundary */ __bss_start = .; /* Start address of the BSS section */ *(.bss .bss.*) *(COMMON) /* COMMON -> used for uninitialized global variables that are declared without an explicit section attribute in multiple files. */ __bss_end = .; /* End address of the BSS section */ } . = ALIGN(0x10000); id_pg_dir = .; .data.id_pg_dir : { . += (3 * (1 << 12)); } . = ALIGN(0x10000); high_pg_dir = .; .data.high_pg_dir : { . += (7 * (1 << 12)); } . = ALIGN(0x10000); __user_begin = .; .text.user : { build/user* (.text .text.*) } .rodata.user : { build/user* (.rodata .rodata.*) } .data.user : { build/user* (.data .data.*) } .bss.user : { build/user* (.bss .bss.*) } __user_end = .; _end = .; /* End address of loaded program data */ /* DISCARD certain sections from the final output file, like comments, metadata and debugging information */ /DISCARD/ : { *(.comment) *(.gnu*) *(.note*) *(.eh_frame*) } } /* .gnu.linkonce.d -> used by the GNU linker for certain optimizations * for example, merge identical constants across various files into a single section to save space and access times */ __bss_size = (__bss_end - __bss_start)>>3; /* Size of the BSS section */
also, for reference, the VA_START variable is a macro which resolves to 0x0 (I'm trying out identity mapping as of now...)
I am not sure as to what I am doing wrong, or what exactly I am missing and failing to understand. Any help would be really helpful...Thanks!
I am using a raspberry pi 5, and the arm stub hands over control to the kernel bootstrap code in EL2, hence I stoop down from EL2 to EL1 in the bootstrap.