From 0263a8d9a16fc1e998422f595cd3acaa740e10e4 Mon Sep 17 00:00:00 2001 From: Axel Heider Date: Fri, 28 Jun 2024 15:41:22 +0200 Subject: [PATCH] boot/smp: use memory order aware builtin functions In C++11 memory order aware access functions were introduced, these are available in C also. Use them to allow the compiler choosing proper barriers automatically. Signed-off-by: Axel Heider --- src/arch/arm/kernel/boot.c | 29 ++++++++++++++++--------- src/arch/riscv/kernel/boot.c | 41 +++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/src/arch/arm/kernel/boot.c b/src/arch/arm/kernel/boot.c index 1c8decc70c..9974323be8 100644 --- a/src/arch/arm/kernel/boot.c +++ b/src/arch/arm/kernel/boot.c @@ -34,7 +34,7 @@ * spinning until the primary core has initialized all kernel structures and * then set it to 1. */ -BOOT_BSS static volatile int node_boot_lock; +BOOT_BSS static int node_boot_lock; #endif /* ENABLE_SMP_SUPPORT */ BOOT_BSS static region_t reserved[NUM_RESERVED_REGIONS]; @@ -263,8 +263,12 @@ BOOT_CODE static void init_plat(void) #ifdef ENABLE_SMP_SUPPORT BOOT_CODE static bool_t try_init_kernel_secondary_core(void) { - /* need to first wait until some kernel init has been done */ - while (!node_boot_lock); + /* Busy wait for primary node to release secondary nodes. Using C11 atomics + * guarantees proper visibility across threads and cores. + */ + while (0 == __atomic_load_n(&node_boot_lock, __ATOMIC_ACQUIRE)) { + /* keep spinning */ + } /* Perform cpu init */ init_cpu(); @@ -283,7 +287,7 @@ BOOT_CODE static bool_t try_init_kernel_secondary_core(void) NODE_LOCK_SYS; clock_sync_test(); - ksNumCPUs++; + (void)__atomic_fetch_add(&ksNumCPUs, 1, __ATOMIC_ACQ_REL); init_core_state(SchedulerAction_ResumeCurrentThread); @@ -292,9 +296,11 @@ BOOT_CODE static bool_t try_init_kernel_secondary_core(void) BOOT_CODE static void release_secondary_cpus(void) { - /* release the cpus at the same time */ + /* Release all nodes at the same time. Update the lock in a way that ensures + * the result is visible everywhere. + */ assert(0 == node_boot_lock); /* Sanity check for a proper lock state. */ - node_boot_lock = 1; + __atomic_store_n(&node_boot_lock, 1, __ATOMIC_RELEASE); /* * At this point in time the primary core (executing this code) already uses @@ -314,12 +320,14 @@ BOOT_CODE static void release_secondary_cpus(void) #endif /* Wait until all the secondary cores are done initialising */ - while (ksNumCPUs != CONFIG_MAX_NUM_NODES) { + while (CONFIG_MAX_NUM_NODES != __atomic_load_n(&ksNumCPUs, __ATOMIC_ACQUIRE)) { #ifdef ENABLE_SMP_CLOCK_SYNC_TEST_ON_BOOT + /* Secondary cores compare their time with our primary core's time, so + * keep updating the timestamp while spinning. + */ NODE_STATE(ksCurTime) = getCurrentTime(); + __atomic_thread_fence(__ATOMIC_RELEASE); /* ensure write propagates */ #endif - /* perform a memory acquire to get new values of ksNumCPUs, release for ksCurTime */ - __atomic_thread_fence(__ATOMIC_ACQ_REL); } } #endif /* ENABLE_SMP_SUPPORT */ @@ -607,7 +615,8 @@ static BOOT_CODE bool_t try_init_kernel( invalidateHypTLB(); } - ksNumCPUs = 1; + /* primary node is avaialble */ + __atomic_store_n(&ksNumCPUs, 1, __ATOMIC_RELEASE); /* initialize BKL before booting up other cores */ SMP_COND_STATEMENT(clh_lock_init()); diff --git a/src/arch/riscv/kernel/boot.c b/src/arch/riscv/kernel/boot.c index 34d6f6db61..3144fb4c62 100644 --- a/src/arch/riscv/kernel/boot.c +++ b/src/arch/riscv/kernel/boot.c @@ -27,8 +27,8 @@ * spinning until the primary core has initialized all kernel structures and * then set it to 1. */ -BOOT_BSS static volatile word_t node_boot_lock; -#endif +BOOT_BSS static int node_boot_lock; +#endif /* ENABLE_SMP_SUPPORT */ BOOT_BSS static region_t res_reg[NUM_RESERVED_REGIONS]; @@ -153,15 +153,20 @@ BOOT_CODE static void init_plat(void) #ifdef ENABLE_SMP_SUPPORT BOOT_CODE static bool_t try_init_kernel_secondary_core(word_t hart_id, word_t core_id) { - while (!node_boot_lock); + /* Busy wait for primary node to release secondary nodes. Using C11 atomics + * guarantees proper visibility across threads and cores. + */ + while (0 == __atomic_load_n(&node_boot_lock, __ATOMIC_ACQUIRE)) { + /* keep spinning */ + } - fence_r_rw(); + fence_r_rw(); /* ToDo: is this still necessary? */ init_cpu(); NODE_LOCK_SYS; clock_sync_test(); - ksNumCPUs++; + (void)__atomic_fetch_add(&ksNumCPUs, 1, __ATOMIC_ACQ_REL); init_core_state(SchedulerAction_ResumeCurrentThread); ifence_local(); return true; @@ -169,21 +174,36 @@ BOOT_CODE static bool_t try_init_kernel_secondary_core(word_t hart_id, word_t co BOOT_CODE static void release_secondary_cores(void) { + /* Release all nodes at the same time. Update the lock in a way that ensures + * the result is visible everywhere. + */ assert(0 == node_boot_lock); /* Sanity check for a proper lock state. */ - node_boot_lock = 1; + __atomic_store_n(&node_boot_lock, 1, __ATOMIC_RELEASE); + /* At this point in time the primary core (executing this code) already uses * the seL4 MMU/cache setup. However, the secondary cores are still using * the elfloader's MMU/cache setup, and thus the update of node_boot_lock * may not be visible there if the setups differ. Currently, the mappings - * match, so a barrier is all that is needed. + * match, so __atomic_store_n() should be sufficient. It generates the + * assembler sequence + * fence iorw,ow + * amoswap.w zero,a5,(a3) // a5 = 1, a3 = &node_boot_lock + * that ensure the write really happens and becomes globally visible to make + * the secondary harts boot before we start the polling loop that checks + * ksNumCPUs to determine when all nodes are up. Having another explicit + * barrier should no longer be necessary. */ fence_rw_rw(); - while (ksNumCPUs != CONFIG_MAX_NUM_NODES) { + /* Keep spinning until all secondary nodes are up */ + while (CONFIG_MAX_NUM_NODES != __atomic_load_n(&ksNumCPUs, __ATOMIC_ACQUIRE)) { #ifdef ENABLE_SMP_CLOCK_SYNC_TEST_ON_BOOT + /* Secondary cores compare their time with our primary core's time, so + * keep updating the timestamp while spinning. + */ NODE_STATE(ksCurTime) = getCurrentTime(); + __atomic_thread_fence(__ATOMIC_RELEASE); /* ensure write propagates */ #endif - __atomic_thread_fence(__ATOMIC_ACQ_REL); } } #endif /* ENABLE_SMP_SUPPORT */ @@ -437,7 +457,8 @@ static BOOT_CODE bool_t try_init_kernel( /* finalise the bootinfo frame */ bi_finalise(); - ksNumCPUs = 1; + /* primary node is avaialble */ + __atomic_store_n(&ksNumCPUs, 1, __ATOMIC_RELEASE); SMP_COND_STATEMENT(clh_lock_init()); SMP_COND_STATEMENT(release_secondary_cores());