diff --git a/arch/arm64/core/CMakeLists.txt b/arch/arm64/core/CMakeLists.txt index 03a34e439bb213..99e7324c2a241a 100644 --- a/arch/arm64/core/CMakeLists.txt +++ b/arch/arm64/core/CMakeLists.txt @@ -56,6 +56,7 @@ if(CMAKE_C_COMPILER_ID STREQUAL "GNU") endif() add_subdirectory_ifdef(CONFIG_XEN xen) +add_subdirectory_ifdef(CONFIG_ZVM zvm) if(CONFIG_GEN_SW_ISR_TABLE) if(CONFIG_DYNAMIC_INTERRUPTS) diff --git a/arch/arm64/core/Kconfig b/arch/arm64/core/Kconfig index 367480015c4b1f..84afb86848137a 100644 --- a/arch/arm64/core/Kconfig +++ b/arch/arm64/core/Kconfig @@ -85,6 +85,13 @@ config HAS_ARM_SMCCC Include support for the Secure Monitor Call (SMC) and Hypervisor Call (HVC) instructions on Armv7 and above architectures. +config HAS_ARM_VHE + bool + default y if CPU_CORTEX_A55 || CPU_CORTEX_A76 || CPU_CORTEX_A76_A55 + help + This option signifies the enable of Virtualization Host Extention + on Armv8.1+ platform. + config NUM_IRQS int @@ -219,6 +226,7 @@ config ARMV8_A It supports the T32 and A32 instruction sets. rsource "xen/Kconfig" +rsource "zvm/Kconfig" endif # CPU_CORTEX_A @@ -273,6 +281,14 @@ config ARM64_SET_VMPIDR_EL2 This register may already be set by bootloader at the EL2 stage, if not, Zephyr should set it. +config ARM64_SET_VPIDR_EL2 + bool "Set VPIDR_EL2 at EL2 stage" + help + VPIDR_EL2 holds the value of the Virtualization ID. + This is the value returned by EL1 reads of MIDR_EL1. + This register may already be set by bootloader at the EL2 stage, if + not, Zephyr should set it. + if ARM_MMU config MMU_PAGE_SIZE diff --git a/arch/arm64/core/isr_wrapper.S b/arch/arm64/core/isr_wrapper.S index 809762f27f96b0..e57d9d18151acd 100644 --- a/arch/arm64/core/isr_wrapper.S +++ b/arch/arm64/core/isr_wrapper.S @@ -98,7 +98,7 @@ oob: msr daifset, #(DAIFSET_IRQ_BIT) /* Signal end-of-interrupt */ - ldp x0, xzr, [sp], #16 + ldp x0, xzr, [sp] spurious_continue: #if !defined(CONFIG_ARM_CUSTOM_INTERRUPT_CONTROLLER) @@ -107,6 +107,18 @@ spurious_continue: bl z_soc_irq_eoi #endif /* !CONFIG_ARM_CUSTOM_INTERRUPT_CONTROLLER */ + ldp x0, xzr, [sp] +#ifdef CONFIG_ZVM + /* switch handle preprocess */ + bl zvm_switch_handle_pre + mov x1, x0 + ldp x0, xzr, [sp], #16 + /* passthrough device may need deactive */ + bl arm_gic_eoi_deactive +#else + ldp x0, xzr, [sp], #16 +#endif + #ifdef CONFIG_TRACING bl sys_trace_isr_exit #endif diff --git a/arch/arm64/core/mmu.c b/arch/arm64/core/mmu.c index f0abb4a029a690..f5353d5966509b 100644 --- a/arch/arm64/core/mmu.c +++ b/arch/arm64/core/mmu.c @@ -965,13 +965,19 @@ void z_arm64_mm_init(bool is_primary_core) __ASSERT(CONFIG_MMU_PAGE_SIZE == KB(4), "Only 4K page size is supported\n"); +#if defined(CONFIG_ZVM) && defined(CONFIG_HAS_ARM_VHE) + __ASSERT(GET_EL(read_currentel()) == MODE_EL2, + "Exception level not EL2, MMU not enabled!\n"); + /* Ensure that MMU is already not enabled */ + __ASSERT((read_sctlr_el2() & SCTLR_M_BIT) == 0, "MMU is already enabled\n"); +#else __ASSERT(GET_EL(read_currentel()) == MODE_EL1, "Exception level not EL1, MMU not enabled!\n"); /* Ensure that MMU is already not enabled */ __ASSERT((read_sctlr_el1() & SCTLR_M_BIT) == 0, "MMU is already enabled\n"); - +#endif /* * Only booting core setup up the page tables. */ diff --git a/arch/arm64/core/offsets/offsets.c b/arch/arm64/core/offsets/offsets.c index 772f0df3a8d05d..0ca54df941e761 100644 --- a/arch/arm64/core/offsets/offsets.c +++ b/arch/arm64/core/offsets/offsets.c @@ -77,6 +77,17 @@ GEN_NAMED_OFFSET_SYM(arm_smccc_res_t, a6, a6_a7); #endif /* CONFIG_HAS_ARM_SMCCC */ +#ifdef CONFIG_ZVM +GEN_OFFSET_SYM(zvm_vcpu_context_t, regs); +GEN_OFFSET_SYM(vcpu_t, arch); +GEN_OFFSET_SYM(vcpu_arch_t, ctxt); +GEN_OFFSET_SYM(arch_commom_regs_t, callee_saved_regs); +GEN_OFFSET_SYM(arch_commom_regs_t, esf_handle_regs); +GEN_OFFSET_SYM(arch_commom_regs_t, pc); +GEN_OFFSET_SYM(arch_commom_regs_t, pstate); +GEN_OFFSET_SYM(arch_commom_regs_t, lr); +#endif /* CONFIG_ZVM */ + GEN_ABS_SYM_END #endif /* _ARM_OFFSETS_INC_ */ diff --git a/arch/arm64/core/prep_c.c b/arch/arm64/core/prep_c.c index 1aca46f23767e8..a8fa9229b25d37 100644 --- a/arch/arm64/core/prep_c.c +++ b/arch/arm64/core/prep_c.c @@ -32,7 +32,7 @@ void z_prep_c(void) { /* Initialize tpidrro_el0 with our struct _cpu instance address */ write_tpidrro_el0((uintptr_t)&_kernel.cpus[0]); - + arch_set_cpu_id_elx(); z_bss_zero(); z_data_copy(); #ifdef CONFIG_ARM64_SAFE_EXCEPTION_STACK diff --git a/arch/arm64/core/reset.S b/arch/arm64/core/reset.S index a01139ad700916..622f42b1ea8388 100644 --- a/arch/arm64/core/reset.S +++ b/arch/arm64/core/reset.S @@ -70,6 +70,10 @@ SECTION_SUBSEC_FUNC(TEXT,_reset_section,__reset_prep_c) /* Set SP_EL1 */ msr sp_el1, x24 +#if defined(CONFIG_HAS_ARM_VHE) + msr SPsel, #1 + mov sp, x24 +#endif b out 1: /* Disable alignment fault checking */ @@ -238,6 +242,13 @@ switch_el: mov_imm x0, (SPSR_DAIF_MASK | SPSR_MODE_EL1T) msr spsr_el2, x0 + /* Is VHE mode? */ + mrs x0, hcr_el2 + and x0, x0, #HCR_E2H_BIT + cbz x0, nvhe_branch + b 1f + +nvhe_branch: adr x0, 1f msr elr_el2, x0 eret diff --git a/arch/arm64/core/reset.c b/arch/arm64/core/reset.c index 03cf389007d949..35d31e129b338f 100644 --- a/arch/arm64/core/reset.c +++ b/arch/arm64/core/reset.c @@ -8,6 +8,8 @@ #include #include "boot.h" +uint64_t cpu_vmpidr_el2_list[CONFIG_MP_NUM_CPUS] = {0}; + void z_arm64_el2_init(void); void __weak z_arm64_el_highest_plat_init(void) @@ -151,11 +153,32 @@ void z_arm64_el2_init(void) zero_cnthp_ctl_el2(); #endif +#ifdef CONFIG_ARM64_SET_VPIDR_EL2 + reg = read_midr_el1(); + write_vpidr_el2(reg); +#endif + #ifdef CONFIG_ARM64_SET_VMPIDR_EL2 reg = read_mpidr_el1(); write_vmpidr_el2(reg); #endif +#if defined(CONFIG_ZVM) && defined(CONFIG_HAS_ARM_VHE) + reg = read_hcr_el2(); + reg |= HCR_VHE_FLAGS; + write_hcr_el2(reg); + + reg = read_mpidr_el1(); + cpu_vmpidr_el2_list[MPIDR_TO_CORE(GET_MPIDR())] = reg; + + /* Disable CP15 trapping to EL2 of EL1 accesses to System register */ + zero_sysreg(hstr_el2); + /* Disable Debug related register */ + zero_sysreg(mdcr_el2); + /* Init stage-2 translation table base register */ + zero_sysreg(vttbr_el2); +#endif + /* * Enable this if/when we use the hypervisor timer. * write_cnthp_cval_el2(~(uint64_t)0); @@ -187,6 +210,7 @@ void z_arm64_el1_init(void) write_sctlr_el1(reg); write_cntv_cval_el0(~(uint64_t)0); + write_cntp_cval_el0(~(uint64_t)0); /* * Enable these if/when we use the corresponding timers. * write_cntp_cval_el0(~(uint64_t)0); diff --git a/arch/arm64/core/smp.c b/arch/arm64/core/smp.c index bbb7f9634317d8..3a360cc8dc307e 100644 --- a/arch/arm64/core/smp.c +++ b/arch/arm64/core/smp.c @@ -144,7 +144,7 @@ void arch_secondary_cpu_init(int cpu_num) /* Initialize tpidrro_el0 with our struct _cpu instance address */ write_tpidrro_el0((uintptr_t)&_kernel.cpus[cpu_num]); - + arch_set_cpu_id_elx(); z_arm64_mm_init(false); #ifdef CONFIG_ARM64_SAFE_EXCEPTION_STACK diff --git a/arch/arm64/core/thread.c b/arch/arm64/core/thread.c index 18f49945eda495..9ec8c81283f7b5 100644 --- a/arch/arm64/core/thread.c +++ b/arch/arm64/core/thread.c @@ -130,9 +130,14 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack, pInitCtx->elr = (uint64_t)z_thread_entry; #endif +#if defined(CONFIG_ZVM) && defined(CONFIG_HAS_ARM_VHE) + pInitCtx->spsr = SPSR_MODE_EL2H | DAIF_FIQ_BIT; + /* init thread's vcpu_struct */ + thread->vcpu_struct = NULL; +#else /* Keep using SP_EL1 */ pInitCtx->spsr = SPSR_MODE_EL1H | DAIF_FIQ_BIT; - +#endif /* thread birth happens through the exception return path */ thread->arch.exception_depth = 1; diff --git a/arch/arm64/core/zvm/CMakeLists.txt b/arch/arm64/core/zvm/CMakeLists.txt new file mode 100644 index 00000000000000..945bf228ee3725 --- /dev/null +++ b/arch/arm64/core/zvm/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +zephyr_library() + +zephyr_library_sources( + cpu.c + mmu.c + timer.c + switch.c + hyp_entry.S + hyp_vector.S +) diff --git a/arch/arm64/core/zvm/Kconfig b/arch/arm64/core/zvm/Kconfig new file mode 100644 index 00000000000000..c27a0ec9f5c82d --- /dev/null +++ b/arch/arm64/core/zvm/Kconfig @@ -0,0 +1,32 @@ + # Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + # Copyright 2024-2025 openEuler SIG-Zephyr + # SPDX-License-Identifier: Apache-2.0 + +if ARM_MMU + +config ZVM_MAX_VM_XLAT_TABLES + int "Maximum numbers of VM stage-2 translation tables" + default 1024 + help + This option specifies the maximum number of vm translation tables. + Translation tables are allocated at compile time and used at runtime as needed. + If the runtime need exceeds the preallocated number of translation tables, + it will result in an assertion failure. + +endif # ARM_MMU + +config VIRT_ARM_ARCH_TIMER + bool "Enable VIRT ARM arch timer" + default y + help + This option enables the ARM arch timer for the virtual machine. + +if VIRT_ARM_ARCH_TIMER + +config VIRT_ARM_ARCH_TIMER_PRIORITY + int "VIRT ARM arch timer priority" + default 78 + help + This option specifies the priority of the ARM arch timer for the virtual machine. + +endif # VIRT_ARM_ARCH_TIMER diff --git a/arch/arm64/core/zvm/cpu.c b/arch/arm64/core/zvm/cpu.c new file mode 100644 index 00000000000000..a7365a5275b559 --- /dev/null +++ b/arch/arm64/core/zvm/cpu.c @@ -0,0 +1,461 @@ +/* + * Copyright 2021-2022 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +extern uint64_t cpu_vmpidr_el2_list[CONFIG_MP_NUM_CPUS]; + +/** + * @brief Check whether Hyp and vhe are supported + */ +static bool is_basic_hardware_support(void) +{ + if (!is_el_implemented(MODE_EL2)) { + ZVM_LOG_ERR("Hyp mode not available on this system.\n"); + return false; + } + + if (is_el2_vhe_supported()) { + return true; + } + return false; +} + +static bool is_gicv3_device_support(void) +{ +#if defined(CONFIG_GIC_V3) + return true; +#else + return false; +#endif +} + +static int vcpu_virq_init(struct z_vcpu *vcpu) +{ + struct gicv3_vcpuif_ctxt *ctxt; + + /* init vgicv3 context */ + ctxt = (struct gicv3_vcpuif_ctxt *)k_malloc(sizeof(struct gicv3_vcpuif_ctxt)); + if(!ctxt) { + ZVM_LOG_ERR("Init vcpu context failed"); + return -ENXIO; + } + memset(ctxt, 0, sizeof(struct gicv3_vcpuif_ctxt)); + + vcpu_gicv3_init(ctxt); + vcpu->arch->virq_data = ctxt; + + return 0; +} + +static int vcpu_virq_deinit(struct z_vcpu *vcpu) +{ + struct gicv3_vcpuif_ctxt *ctxt; + + ctxt = vcpu->arch->virq_data; + k_free(ctxt); + + return 0; +} + +static void vcpu_vgic_save(struct z_vcpu *vcpu) +{ + vgicv3_state_save(vcpu, (struct gicv3_vcpuif_ctxt *)vcpu->arch->virq_data); +} + +static void vcpu_vgic_load(struct z_vcpu *vcpu) +{ + vgicv3_state_load(vcpu, (struct gicv3_vcpuif_ctxt *)vcpu->arch->virq_data); +} + +static void vcpu_vtimer_save(struct z_vcpu *vcpu) +{ + uint64_t vcycles, pcycles; + k_timeout_t vticks, pticks; + struct virt_timer_context *timer_ctxt = vcpu->arch->vtimer_context; + +#ifdef CONFIG_HAS_ARM_VHE + /* virt timer save */ + timer_ctxt->cntv_ctl = read_cntv_ctl_el02(); + write_cntv_ctl_el02(timer_ctxt->cntv_ctl & ~CNTV_CTL_ENABLE_BIT); + timer_ctxt->cntv_cval = read_cntv_cval_el02(); + /* phys timer save */ + timer_ctxt->cntp_ctl = read_cntp_ctl_el02(); + write_cntp_ctl_el02(timer_ctxt->cntp_ctl & ~CNTP_CTL_ENABLE_BIT); + timer_ctxt->cntp_cval = read_cntp_cval_el02(); + + if (timer_ctxt->cntv_ctl & CNTV_CTL_ENABLE_BIT && !(timer_ctxt->cntv_ctl & CNTV_CTL_IMASK_BIT)) { + vcycles = read_cntvct_el0(); + if (timer_ctxt->cntv_cval <= vcycles) { + vticks.ticks = 0; + } else { + vticks.ticks = (timer_ctxt->cntv_cval - vcycles) / HOST_CYC_PER_TICK; + } + z_add_timeout(&timer_ctxt->vtimer_timeout, timer_ctxt->vtimer_timeout.fn, vticks); + } + if (timer_ctxt->cntp_ctl & CNTP_CTL_ENABLE_BIT && !(timer_ctxt->cntp_ctl & CNTP_CTL_IMASK_BIT)) { + pcycles = read_cntpct_el0(); + if (timer_ctxt->cntp_cval <= pcycles) { + pticks.ticks = 0; + } else { + pticks.ticks = (timer_ctxt->cntp_cval - pcycles) / HOST_CYC_PER_TICK; + } + z_add_timeout(&timer_ctxt->ptimer_timeout, timer_ctxt->ptimer_timeout.fn, pticks); + } +#else + timer_ctxt->cntv_ctl = read_cntv_ctl_el0(); + write_cntv_ctl_el0(timer_ctxt->cntv_ctl & ~CNTV_CTL_ENABLE_BIT); + timer_ctxt->cntv_cval = read_cntv_cval_el0(); +#endif + barrier_dsync_fence_full(); +} + +static void vcpu_vtimer_load(struct z_vcpu *vcpu) +{ + struct virt_timer_context *timer_ctxt = vcpu->arch->vtimer_context; + + z_abort_timeout(&timer_ctxt->vtimer_timeout); + z_abort_timeout(&timer_ctxt->ptimer_timeout); + +#ifdef CONFIG_HAS_ARM_VHE + write_cntvoff_el2(timer_ctxt->timer_offset); +#else + write_cntvoff_el2(timer_ctxt->timer_offset); + write_cntv_cval_el0(timer_ctxt->cntv_cval); + write_cntv_ctl_el0(timer_ctxt->cntv_ctl); +#endif + barrier_dsync_fence_full(); +} + +static void arch_vcpu_sys_regs_init(struct z_vcpu *vcpu) +{ + struct zvm_vcpu_context *aarch64_c = &vcpu->arch->ctxt; + + /*Each vcpu's mpidr_el1 is equal to physical cpu begin from 0 to n. */ + aarch64_c->sys_regs[VCPU_MPIDR_EL1] = cpu_vmpidr_el2_list[vcpu->vcpu_id]; + + aarch64_c->sys_regs[VCPU_CPACR_EL1] = 0x03 << 20; + aarch64_c->sys_regs[VCPU_VPIDR] = 0x410fc050; + + aarch64_c->sys_regs[VCPU_TTBR0_EL1] = 0; + aarch64_c->sys_regs[VCPU_TTBR1_EL1] = 0; + aarch64_c->sys_regs[VCPU_MAIR_EL1] = 0; + aarch64_c->sys_regs[VCPU_TCR_EL1] = 0; + aarch64_c->sys_regs[VCPU_PAR_EL1] = 0; + aarch64_c->sys_regs[VCPU_AMAIR_EL1] = 0; + + aarch64_c->sys_regs[VCPU_TPIDR_EL0] = read_tpidr_el0(); + aarch64_c->sys_regs[VCPU_TPIDRRO_EL0] = read_tpidrro_el0(); + aarch64_c->sys_regs[VCPU_CSSELR_EL1] = read_csselr_el1(); + aarch64_c->sys_regs[VCPU_SCTLR_EL1] = 0x30C50838; + aarch64_c->sys_regs[VCPU_ESR_EL1] = 0; + aarch64_c->sys_regs[VCPU_AFSR0_EL1] = 0; + aarch64_c->sys_regs[VCPU_AFSR1_EL1] = 0; + aarch64_c->sys_regs[VCPU_FAR_EL1] = 0; + aarch64_c->sys_regs[VCPU_VBAR_EL1] = 0; + aarch64_c->sys_regs[VCPU_CONTEXTIDR_EL1] = 0; + aarch64_c->sys_regs[VCPU_CNTKCTL_EL1] = 0; + aarch64_c->sys_regs[VCPU_ELR_EL1] = 0; + aarch64_c->sys_regs[VCPU_SPSR_EL1] = SPSR_MODE_EL1H; +} + +static void arch_vcpu_sys_regs_deinit(struct z_vcpu *vcpu) +{ + int i; + struct zvm_vcpu_context *aarch64_c = &vcpu->arch->ctxt; + + for (i = 0; i < VCPU_SYS_REG_NUM; i++) { + aarch64_c->sys_regs[i] = 0; + } +} + +static void arch_vcpu_common_regs_init(struct z_vcpu *vcpu) +{ + struct zvm_vcpu_context *ctxt; + + ctxt = &vcpu->arch->ctxt; + memset(&ctxt->regs, 0, sizeof(struct zvm_vcpu_context)); + + ctxt->regs.pc = vcpu->vm->os->info.entry_point; + ctxt->regs.pstate = (SPSR_MODE_EL1H | DAIF_DBG_BIT | DAIF_ABT_BIT | + DAIF_IRQ_BIT | DAIF_FIQ_BIT ); +} + +static void arch_vcpu_common_regs_deinit(struct z_vcpu *vcpu) +{ + ARG_UNUSED(vcpu); +} + +static void arch_vcpu_fp_regs_init(struct z_vcpu *vcpu) +{ + ARG_UNUSED(vcpu); +} + +static void arch_vcpu_fp_regs_deinit(struct z_vcpu *vcpu) +{ + ARG_UNUSED(vcpu); +} + +uint64_t* find_index_reg(uint16_t index, arch_commom_regs_t *regs) +{ + uint64_t *value; + + if (index == 31) { + value = NULL; + } else if (index > 18 && index < 30) { + value = ®s->callee_saved_regs.x19 + index - 19; + } else { + value = (index == 30) ? ®s->esf_handle_regs.lr : (®s->esf_handle_regs.x0 + index); + } + return value; +} + +void vcpu_sysreg_load(struct z_vcpu *vcpu) +{ + struct zvm_vcpu_context *g_context = &vcpu->arch->ctxt; + + write_csselr_el1(g_context->sys_regs[VCPU_CSSELR_EL1]); + write_vmpidr_el2(g_context->sys_regs[VCPU_MPIDR_EL1]); + write_sctlr_el12(g_context->sys_regs[VCPU_SCTLR_EL1]); + write_tcr_el12(g_context->sys_regs[VCPU_TCR_EL1]); + write_cpacr_el12(g_context->sys_regs[VCPU_CPACR_EL1]); + write_ttbr0_el12(g_context->sys_regs[VCPU_TTBR0_EL1]); + write_ttbr1_el12(g_context->sys_regs[VCPU_TTBR1_EL1]); + write_esr_el12(g_context->sys_regs[VCPU_ESR_EL1]); + write_afsr0_el12(g_context->sys_regs[VCPU_AFSR0_EL1]); + write_afsr1_el12(g_context->sys_regs[VCPU_AFSR1_EL1]); + write_far_el12(g_context->sys_regs[VCPU_FAR_EL1]); + write_mair_el12(g_context->sys_regs[VCPU_MAIR_EL1]); + write_vbar_el12(g_context->sys_regs[VCPU_VBAR_EL1]); + write_contextidr_el12(g_context->sys_regs[VCPU_CONTEXTIDR_EL1]); + write_amair_el12(g_context->sys_regs[VCPU_AMAIR_EL1]); + write_cntkctl_el12(g_context->sys_regs[VCPU_CNTKCTL_EL1]); + write_par_el1(g_context->sys_regs[VCPU_PAR_EL1]); + write_tpidr_el1(g_context->sys_regs[VCPU_TPIDR_EL1]); + write_sp_el1(g_context->sys_regs[VCPU_SP_EL1]); + write_elr_el12(g_context->sys_regs[VCPU_ELR_EL1]); + write_spsr_el12(g_context->sys_regs[VCPU_SPSR_EL1]); + + vcpu->arch->vcpu_sys_register_loaded = true; + write_hstr_el2(BIT(15)); + vcpu->arch->host_mdcr_el2 = read_mdcr_el2(); + write_mdcr_el2(vcpu->arch->guest_mdcr_el2); +} + +void vcpu_sysreg_save(struct z_vcpu *vcpu) +{ + struct zvm_vcpu_context *g_context = &vcpu->arch->ctxt; + + g_context->sys_regs[VCPU_MPIDR_EL1] = read_vmpidr_el2(); + g_context->sys_regs[VCPU_CSSELR_EL1] = read_csselr_el1(); + g_context->sys_regs[VCPU_ACTLR_EL1] = read_actlr_el1(); + + g_context->sys_regs[VCPU_SCTLR_EL1] = read_sctlr_el12(); + g_context->sys_regs[VCPU_CPACR_EL1] = read_cpacr_el12(); + g_context->sys_regs[VCPU_TTBR0_EL1] = read_ttbr0_el12(); + g_context->sys_regs[VCPU_TTBR1_EL1] = read_ttbr1_el12(); + g_context->sys_regs[VCPU_ESR_EL1] = read_esr_el12(); + g_context->sys_regs[VCPU_TCR_EL1] = read_tcr_el12(); + g_context->sys_regs[VCPU_AFSR0_EL1] = read_afsr0_el12(); + g_context->sys_regs[VCPU_AFSR1_EL1] = read_afsr1_el12(); + g_context->sys_regs[VCPU_FAR_EL1] = read_far_el12(); + g_context->sys_regs[VCPU_MAIR_EL1] = read_mair_el12(); + g_context->sys_regs[VCPU_VBAR_EL1] = read_vbar_el12(); + g_context->sys_regs[VCPU_CONTEXTIDR_EL1] = read_contextidr_el12(); + g_context->sys_regs[VCPU_AMAIR_EL1] = read_amair_el12(); + g_context->sys_regs[VCPU_CNTKCTL_EL1] = read_cntkctl_el12(); + + g_context->sys_regs[VCPU_PAR_EL1] = read_par_el1(); + g_context->sys_regs[VCPU_TPIDR_EL1] = read_tpidr_el1(); + g_context->regs.esf_handle_regs.elr = read_elr_el12(); + g_context->regs.esf_handle_regs.spsr = read_spsr_el12(); + vcpu->arch->vcpu_sys_register_loaded = false; +} + +void switch_to_guest_sysreg(struct z_vcpu *vcpu) +{ + uint32_t reg_val; + + struct zvm_vcpu_context *gcontext = &vcpu->arch->ctxt; + struct zvm_vcpu_context *hcontext = &vcpu->arch->host_ctxt; + + /* save host context */ + hcontext->running_vcpu = vcpu; + hcontext->sys_regs[VCPU_SPSR_EL1] = read_spsr_el1(); + hcontext->sys_regs[VCPU_MDSCR_EL1] = read_mdscr_el1(); + + /* load stage-2 pgd for vm */ + write_vtcr_el2(vcpu->vm->arch->vtcr_el2); + write_vttbr_el2(vcpu->vm->arch->vttbr); + barrier_isync_fence_full(); + + /* enable hyperviosr trap */ + write_hcr_el2(vcpu->arch->hcr_el2); + reg_val = read_cpacr_el1(); + reg_val |= CPACR_EL1_TTA; + reg_val &= ~CPACR_EL1_ZEN; + reg_val |= CPTR_EL2_TAM; + reg_val |= CPACR_EL1_FPEN_NOTRAP; + write_cpacr_el1(reg_val); + write_vbar_el2((uint64_t)_hyp_vector_table); + + hcontext->sys_regs[VCPU_TPIDRRO_EL0] = read_tpidrro_el0(); + write_tpidrro_el0(gcontext->sys_regs[VCPU_TPIDRRO_EL0]); + write_elr_el2(gcontext->regs.pc); + write_spsr_el2(gcontext->regs.pstate); + reg_val = ((struct gicv3_vcpuif_ctxt *)vcpu->arch->virq_data)->icc_ctlr_el1; + reg_val &= ~(0x02); + write_sysreg(reg_val, ICC_CTLR_EL1); +} + +void switch_to_host_sysreg(struct z_vcpu *vcpu) +{ + uint32_t reg_val; + struct zvm_vcpu_context *gcontext = &vcpu->arch->ctxt; + struct zvm_vcpu_context *hcontext = &vcpu->arch->host_ctxt; + + gcontext->sys_regs[VCPU_TPIDRRO_EL0] = read_tpidrro_el0(); + write_tpidrro_el0(hcontext->sys_regs[VCPU_TPIDRRO_EL0]); + gcontext->regs.pc = read_elr_el2(); + gcontext->regs.pstate = read_spsr_el2(); + reg_val = ((struct gicv3_vcpuif_ctxt *)vcpu->arch->virq_data)->icc_ctlr_el1; + reg_val |= (0x02); + write_sysreg(reg_val, ICC_CTLR_EL1); + + /* disable hyperviosr trap */ + if (vcpu->arch->hcr_el2 & HCR_VSE_BIT) { + vcpu->arch->hcr_el2 = read_hcr_el2(); + } + write_hcr_el2(HCR_VHE_FLAGS); + write_vbar_el2((uint64_t)_vector_table); + + /* save vm's stage-2 pgd */ + vcpu->vm->arch->vtcr_el2 = read_vtcr_el2(); + vcpu->vm->arch->vttbr = read_vttbr_el2(); + barrier_isync_fence_full(); + + /* load host context */ + write_mdscr_el1(hcontext->sys_regs[VCPU_MDSCR_EL1]); + write_spsr_el1(hcontext->sys_regs[VCPU_SPSR_EL1]); +} + +void arch_vcpu_context_save(struct z_vcpu *vcpu) +{ + vcpu_vgic_save(vcpu); + vcpu_vtimer_save(vcpu); + vcpu_sysreg_save(vcpu); +} + +void arch_vcpu_context_load(struct z_vcpu *vcpu) +{ + int cpu = _current_cpu->id; + vcpu->cpu = cpu; + + vcpu_sysreg_load(vcpu); + vcpu_vtimer_load(vcpu); + vcpu_vgic_load(vcpu); + + vcpu->arch->hcr_el2 &= ~HCR_TWE_BIT; + vcpu->arch->hcr_el2 &= ~HCR_TWI_BIT; +} + +int arch_vcpu_init(struct z_vcpu *vcpu) +{ + int ret = 0; + struct vcpu_arch *vcpu_arch = vcpu->arch; + struct vm_arch *vm_arch = vcpu->vm->arch; + + vcpu_arch->hcr_el2 = HCR_VM_FLAGS ; + vcpu_arch->guest_mdcr_el2 = 0; + vcpu_arch->host_mdcr_el2 = 0; + vcpu_arch->list_regs_map = 0; + vcpu_arch->pause = 0; + vcpu_arch->vcpu_sys_register_loaded = false; + + /* init vm_arch here */ + vm_arch->vtcr_el2 = (0x20 | BIT(6) | BIT(8) | BIT(10) | BIT(12) | BIT(13) | BIT(31)); + vm_arch->vttbr = (vcpu->vm->vmid | vm_arch->vm_pgd_base); + + arch_vcpu_common_regs_init(vcpu); + arch_vcpu_sys_regs_init(vcpu); + arch_vcpu_fp_regs_init(vcpu); + + ret = vcpu_virq_init(vcpu); + if(ret) { + return ret; + } + + ret = arch_vcpu_timer_init(vcpu); + if(ret) { + return ret; + } + +#ifdef CONFIG_VM_DTB_FILE_INPUT + /* passing argu to linux, like fdt and others */ + vcpu_arch->ctxt.regs.esf_handle_regs.x0 = LINUX_DTB_MEM_BASE; + vcpu_arch->ctxt.regs.esf_handle_regs.x1 = 0; + vcpu_arch->ctxt.regs.esf_handle_regs.x2 = 0; + vcpu_arch->ctxt.regs.esf_handle_regs.x3 = 0; + vcpu_arch->ctxt.regs.callee_saved_regs.x20 = LINUX_DTB_MEM_BASE; + vcpu_arch->ctxt.regs.callee_saved_regs.x21 = 0; + vcpu_arch->ctxt.regs.callee_saved_regs.x22 = 0; + vcpu_arch->ctxt.regs.callee_saved_regs.x23 = 0; +#endif + return ret; +} + +int arch_vcpu_deinit(struct z_vcpu *vcpu) +{ + int ret = 0; + + ret = arch_vcpu_timer_deinit(vcpu); + if(ret) { + ZVM_LOG_WARN("Deinit arch timer failed. \n"); + return ret; + } + + ret = vcpu_virq_deinit(vcpu); + if(ret) { + ZVM_LOG_WARN("Deinit virt cpu irq failed. \n"); + return ret; + } + + arch_vcpu_fp_regs_deinit(vcpu); + arch_vcpu_sys_regs_deinit(vcpu); + arch_vcpu_common_regs_deinit(vcpu); + + return ret; +} + +int zvm_arch_init(void *op) +{ + ARG_UNUSED(op); + int ret = 0; + + /* Is hyp、vhe available? */ + if(!is_basic_hardware_support()){ + return -ESRCH; + } + if(!is_gicv3_device_support()){ + return -ENODEV; + } + return ret; +} diff --git a/arch/arm64/core/zvm/hyp_entry.S b/arch/arm64/core/zvm/hyp_entry.S new file mode 100644 index 00000000000000..c93e1e2b439eb2 --- /dev/null +++ b/arch/arm64/core/zvm/hyp_entry.S @@ -0,0 +1,131 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +#include "../include/zvm_offsets_short_arch.h" +#include "../core/macro_priv.inc" + +_ASM_FILE_PROLOGUE + +.macro save_registers_context base + stp x0, x1, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x0_x1] + stp x2, x3, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x2_x3] + stp x4, x5, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x4_x5] + stp x6, x7, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x6_x7] + stp x8, x9, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x8_x9] + stp x10, x11, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x10_x11] + stp x12, x13, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x12_x13] + stp x14, x15, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x14_x15] + stp x16, x17, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x16_x17] + stp x18, lr, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x18_lr] + + stp x19, x20, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x19_20] + stp x21, x22, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x21_x22] + stp x23, x24, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x23_x24] + stp x25, x26, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x25_x26] + stp x27, x28, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x27_x28] + + mrs x4, sp_el0 + stp x29, x4, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x29_sp_el0] + mrs x4, sp_el1 + str x4, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_sp_elx] +.endm + +.macro load_registers_context base + ldp x0, x1, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x0_x1] + ldp x2, x3, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x2_x3] + ldp x4, x5, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x4_x5] + ldp x6, x7, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x6_x7] + ldp x8, x9, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x8_x9] + ldp x10, x11, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x10_x11] + ldp x12, x13, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x12_x13] + ldp x14, x15, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x14_x15] + ldp x16, x17, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x16_x17] + ldp x18, lr, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x18_lr] + + ldp x19, x20, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x19_20] + ldp x21, x22, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x21_x22] + ldp x23, x24, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x23_x24] + ldp x25, x26, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x25_x26] + ldp x27, x28, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x27_x28] + + ldr x4, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_sp_elx] + msr sp_el1, x4 + ldr x4, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x29_sp_el0 + 0x08] + msr sp_el0, x4 + ldr x4, [\base, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x4_x5] + ldr x29, [\base, #_zvm_vcpu_ctxt_arch_regs_to_callee_saved_x29_sp_el0] +.endm + +/** + * @brief VM entry function, where switch to vm context. + * @x0: vcpu info. + * @x1: host cpu context + */ +GTEXT(guest_vm_entry) +SECTION_SUBSEC_FUNC(TEXT, __hyp_section, guest_vm_entry) + + stp x16, x17, [x1, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x16_x17] + /* If there are some errors that ready to process, return! */ + mrs x16, isr_el1 + cbz x16, no_host_isr + mov x0, #ARM_VM_EXCEPTION_IRQ + ldp x16, x17, [x1, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x16_x17] + ret + +no_host_isr: + /* save hyp context */ + save_registers_context x1 + + /* load guest context */ + add x29, x0, #_vcpu_arch_to_ctxt + ldr x29, [x29] + load_registers_context x29 + isb + eret + + +/** + * @brief VM exit function, where switch to hypervisor context. + * @x0: vcpu info. + * @x1: the exception type + */ +GTEXT(guest_vm_exit) +SECTION_SUBSEC_FUNC(TEXT, __hyp_section, guest_vm_exit) + add x0, x0, #_vcpu_arch_to_ctxt + ldr x0, [x0] + + ldp x18, lr, [sp], #16 + + /* store guest context */ + save_registers_context x0 + ldp x4, x5, [sp], #16 + stp x4, x5, [x0, #_zvm_vcpu_ctxt_arch_regs_to_esf_t_x0_x1] + + /* irq exit? jump over! */ + cmp x1, #ARM_VM_EXCEPTION_IRQ + b.eq vm_isr_in_sync + /* If there are irq that need to process when sync occur! */ + mrs x0, isr_el1 + cbz x0, vm_isr_in_sync + mov x1, #ARM_VM_EXCEPTION_IRQ_IN_SYNC +vm_isr_in_sync: + /* Save exception type for next usage. */ + mov x0, x1 + stp x0, x1, [sp, #-16]! + + bl get_zvm_host_context + mov x1, x0 + + /* load host context */ + load_registers_context x1 + isb + ldp x0, x1, [sp], #16 + ret diff --git a/arch/arm64/core/zvm/hyp_vector.S b/arch/arm64/core/zvm/hyp_vector.S new file mode 100644 index 00000000000000..d0d55c641951c8 --- /dev/null +++ b/arch/arm64/core/zvm/hyp_vector.S @@ -0,0 +1,141 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include + +_ASM_FILE_PROLOGUE + +/* + * +------------------+------------------+-------------------------+ + * | Address | Exception type | Description | + * +------------------+------------------+-------------------------+ + * | VBAR_ELn + 0x000 | Synchronous | Current EL with SP0 | + * | + 0x080 | IRQ / vIRQ | | + * | + 0x100 | FIQ / vFIQ | | + * | + 0x180 | SError / vSError | | + * +------------------+------------------+-------------------------+ + * | + 0x200 | Synchronous | Current EL with SPx | + * | + 0x280 | IRQ / vIRQ | | + * | + 0x300 | FIQ / vFIQ | | + * | + 0x380 | SError / vSError | | + * +------------------+------------------+-------------------------+ + * | + 0x400 | Synchronous | Lower EL using AArch64 | + * | + 0x480 | IRQ / vIRQ | | + * | + 0x500 | FIQ / vFIQ | | + * | + 0x580 | SError / vSError | | + * +------------------+------------------+-------------------------+ + * | + 0x600 | Synchronous | Lower EL using AArch32 | + * | + 0x680 | IRQ / vIRQ | | + * | + 0x700 | FIQ / vFIQ | | + * | + 0x780 | SError / vSError | | + * +------------------+------------------+-------------------------+ + */ + +GDATA(_hyp_vector_table) +SECTION_SUBSEC_FUNC(exc_vector_table,_vector_table_section,_hyp_vector_table) + /* The whole table must be 2K aligned */ + .align 11 + + /* Current EL with SP0 / Synchronous */ + .align 7 + b . + /* Current EL with SP0 / IRQ */ + .align 7 + b . + /* Current EL with SP0 / FIQ */ + .align 7 + b . + /* Current EL with SP0 / SError */ + .align 7 + b . + + /* Current EL with SPx / Synchronous */ + .align 7 + b z_arm64_cur_sync_exc + /* Current EL with SPx / IRQ */ + .align 7 + b z_arm64_cur_irq_exc + /* Current EL with SPx / FIQ */ + .align 7 + b . + /* Current EL with SPx / SError */ + .align 7 + b . + + /* Lower EL using AArch64 / Synchronous */ + .align 7 + b z_arm64_hyp_sync_exc + /* Lower EL using AArch64 / IRQ */ + .align 7 + b z_arm64_hyp_irq_exc + /* Lower EL using AArch64 / FIQ */ + .align 7 + b . + /* Lower EL using AArch64 / SError */ + .align 7 + b . + + /* Lower EL using AArch32 / Synchronous */ + .align 7 + b . + /* Lower EL using AArch32 / IRQ */ + .align 7 + b . + /* Lower EL using AArch32 / FIQ */ + .align 7 + b . + /* Lower EL using AArch32 / SError */ + .align 7 + b . + + +GTEXT(z_arm64_cur_sync_exc) +SECTION_FUNC(TEXT, z_arm64_cur_sync_exc) + + /* @TODO Error handling */ + b . + +GTEXT(z_arm64_cur_irq_exc) +SECTION_FUNC(TEXT, z_arm64_cur_irq_exc) + stp x0, x1, [sp, #-16]! + stp x18, lr, [sp, #-16]! + + mov x0, sp + bl z_vm_lower_irq_handler + + mov x1, #ARM_VM_EXCEPTION_IRQ + b guest_vm_exit + + +GTEXT(z_arm64_hyp_sync_exc) +SECTION_FUNC(TEXT, z_arm64_hyp_sync_exc) + /* using x0,x1,x18,x30 for store useful value. */ + stp x0, x1, [sp, #-16]! + stp x18, lr, [sp, #-16]! + + mrs x0, esr_el2 + bl z_vm_lower_sync_handler + + mov x1, #ARM_VM_EXCEPTION_SYNC + b guest_vm_exit + +GTEXT(z_arm64_hyp_irq_exc) +SECTION_FUNC(TEXT, z_arm64_hyp_irq_exc) + stp x0, x1, [sp, #-16]! + stp x18, lr, [sp, #-16]! + + mov x0, sp + bl z_vm_lower_irq_handler + + mov x1, #ARM_VM_EXCEPTION_IRQ + b guest_vm_exit diff --git a/arch/arm64/core/zvm/mmu.c b/arch/arm64/core/zvm/mmu.c new file mode 100644 index 00000000000000..33ff8f5c02c3e7 --- /dev/null +++ b/arch/arm64/core/zvm/mmu.c @@ -0,0 +1,562 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../core/mmu.h" +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +static uint64_t vm_xlat_tables[CONFIG_MAX_VM_NUM][CONFIG_ZVM_MAX_VM_XLAT_TABLES * Ln_XLAT_NUM_ENTRIES]\ + __aligned(Ln_XLAT_NUM_ENTRIES * sizeof(uint64_t)); +static int vm_xlat_use_count[CONFIG_MAX_VM_NUM][CONFIG_ZVM_MAX_VM_XLAT_TABLES]; +static struct k_spinlock vm_xlat_lock; + +/** + * @brief Gets a description of the virtual machine memory. + */ +static uint64_t get_vm_region_desc(uint32_t attrs) +{ + unsigned int mem_type; + uint64_t desc = 0U; + + /* + * AP bits for EL0/EL1 RW permission on S2 + + * AP[2:1] EL0/EL1 + * +--------------------+ + * 00 NULL + * 01 RO + * 10 WO + * 11 RW + */ + + /* AP_R bits for Data access permission */ + desc |= (attrs & MT_S2_R) ? S2_PTE_BLOCK_DESC_AP_RO : S2_PTE_BLOCK_DESC_AP_NO_RW; + + /* AP_W bits for Data access permission */ + desc |= (attrs & MT_S2_W) ? S2_PTE_BLOCK_DESC_AP_WO : S2_PTE_BLOCK_DESC_AP_NO_RW; + + /* The access flag */ + desc |= (attrs & MT_S2_ACCESS_OFF) ? 0 : S2_PTE_BLOCK_DESC_AF; + + mem_type = MT_S2_TYPE(attrs); + + switch (mem_type) { + case MT_S2_DEVICE_nGnRnE: + case MT_S2_DEVICE_nGnRE: + case MT_S2_DEVICE_GRE: + desc |= S2_PTE_BLOCK_DESC_OUTER_SHARE; + /* Map device memory as execute-never */ + desc |= S2_PTE_BLOCK_DESC_PU_XN; + break; + case MT_S2_NORMAL_WT: + case MT_S2_NORMAL_NC: + case MT_S2_NORMAL: + /* Make Normal RW memory as execute */ + if ( (attrs & (MT_S2_R | MT_S2_W)) ) { + desc |= S2_PTE_BLOCK_DESC_NO_XN; + } + + if (mem_type == MT_NORMAL) { + desc |= S2_PTE_BLOCK_DESC_INNER_SHARE; + } + else { + desc |= S2_PTE_BLOCK_DESC_OUTER_SHARE; + } + /** + * When VM thread use atomic operation, stage-2 attributes must be + * Normal memory, Outer Write-Back Cacheable & Inner Write-Back + * Cacheable. + */ + desc |= (S2_PTE_BLOCK_DESC_O_WB_CACHE | S2_PTE_BLOCK_DESC_I_WB_CACHE); + break; + } + + return desc; +} + +/** + * @brief Mapping virtual memory to physical memory. + */ +static void arch_vm_mmap_pre(uintptr_t virt_addr, uintptr_t phys_addr, size_t size, uint32_t flags) +{ + uintptr_t aligned_phys, addr_offset; + size_t aligned_size, align_boundary; + k_spinlock_key_t key; + ARG_UNUSED(key); + uint8_t *dest_addr; + + /* get the align address of this page */ + addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,\ + phys_addr, size, CONFIG_MMU_PAGE_SIZE); + __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys); + __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)), + "wraparound for physical address 0x%lx (size %zu)", + aligned_phys, aligned_size); + + align_boundary = CONFIG_MMU_PAGE_SIZE; + + /* Obtain an appropriately sized chunk of virtual memory */ + dest_addr = (uint8_t *)virt_addr; + + /* If this fails there's something amiss with virt_region_get */ + __ASSERT((uintptr_t)dest_addr < ((uintptr_t)dest_addr + (size - 1)), + "wraparound for virtual address %p (size %zu)", + dest_addr, size); + + return; +} + +static uint64_t *vm_new_table(uint32_t vmid) +{ + unsigned int i; + + /* Look for a free table. */ + for (i = 0U; i < CONFIG_ZVM_MAX_VM_XLAT_TABLES; i++) { + if (vm_xlat_use_count[vmid][i] == 0U) { + vm_xlat_use_count[vmid][i] = 1U; + /* each table assign 512 entrys */ + return &vm_xlat_tables[vmid][i * Ln_XLAT_NUM_ENTRIES]; + } + } + + return NULL; +} + +static inline bool vm_is_desc_block_aligned(uint64_t desc, unsigned int level_size) +{ + uint64_t mask = GENMASK(47, PAGE_SIZE_SHIFT); + bool aligned = !((desc & mask) & (level_size - 1)); + + return aligned; +} + +static inline bool vm_is_desc_superset(uint64_t desc1, uint64_t desc2, unsigned int level) +{ + uint64_t mask = DESC_ATTRS_MASK | GENMASK(47, LEVEL_TO_VA_SIZE_SHIFT(level)); + + return (desc1 & mask) == (desc2 & mask); +} + +static inline bool vm_is_free_desc(uint64_t desc) +{ + return (desc & PTE_DESC_TYPE_MASK) == PTE_INVALID_DESC; +} + +static inline uint64_t *vm_pte_desc_table(uint64_t desc) +{ + uint64_t address = desc & GENMASK(47, PAGE_SIZE_SHIFT); + + return (uint64_t *)address; +} + +static inline bool vm_is_table_desc(uint64_t desc, unsigned int level) +{ + return level != XLAT_LAST_LEVEL && (desc & PTE_DESC_TYPE_MASK) == PTE_TABLE_DESC; +} + +static inline bool vm_is_block_desc(uint64_t desc) +{ + return (desc & PTE_DESC_TYPE_MASK) == PTE_BLOCK_DESC; +} + +static void vm_set_pte_block_desc(uint64_t *pte, uint64_t desc, unsigned int level) +{ + if (desc) { + desc |= (level == XLAT_LAST_LEVEL) ? PTE_PAGE_DESC : PTE_BLOCK_DESC; + } + *pte = desc; +} + +static void vm_set_pte_table_desc(uint64_t *pte, uint64_t *table, unsigned int level) +{ + /* Point pte to new table */ + *pte = PTE_TABLE_DESC | (uint64_t)table; +} + +static inline unsigned int vm_table_index(uint64_t *pte, uint32_t vmid) +{ + unsigned int i ; + + i = (pte - &vm_xlat_tables[vmid][0]) / Ln_XLAT_NUM_ENTRIES; + __ASSERT(i < CONFIG_ZVM_MAX_VM_XLAT_TABLES, "table %p out of range", pte); + + return i; +} + +/* Makes a table free for reuse. */ +static void vm_free_table(uint64_t *table, uint32_t vmid) +{ + unsigned int i = vm_table_index(table, vmid); + + __ASSERT(vm_xlat_use_count[vmid][i] == 1U, "table still in use"); + vm_xlat_use_count[vmid][i] = 0U; +} + +/* Adjusts usage count and returns current count. */ +static int vm_table_usage(uint64_t *table, int adjustment, uint32_t vmid) +{ + unsigned int i,table_use; + i = vm_table_index(table, vmid); + + vm_xlat_use_count[vmid][i] += adjustment; + table_use = vm_xlat_use_count[vmid][i]; + __ASSERT(vm_xlat_use_count[vmid][i] > 0, "usage count underflow"); + + return table_use; +} + +static inline void vm_dec_table_ref(uint64_t *table, uint32_t vmid) +{ + int ref_unit = 0xFFFFFFFF; + + vm_table_usage(table, -ref_unit, vmid); +} + +static inline bool vm_is_table_unused(uint64_t *table, uint32_t vmid) +{ + return vm_table_usage(table, 0, vmid) == 1; +} + +static uint64_t *vm_expand_to_table(uint64_t *pte, unsigned int level, uint32_t vmid) +{ + uint64_t *table; + + if(level >= XLAT_LAST_LEVEL) { + __ASSERT(level < XLAT_LAST_LEVEL, "can't expand last level"); + } + + table = vm_new_table(vmid); + + if (!table) { + return NULL; + } + + if (!vm_is_free_desc(*pte)) { + /* + * If entry at current level was already populated + * then we need to reflect that in the new table. + */ + uint64_t desc = *pte; + unsigned int i, stride_shift; + + __ASSERT(vm_is_block_desc(desc), ""); + + if (level + 1 == XLAT_LAST_LEVEL) { + desc |= PTE_PAGE_DESC; + } + + stride_shift = LEVEL_TO_VA_SIZE_SHIFT(level + 1); + for (i = 0U; i < Ln_XLAT_NUM_ENTRIES; i++) { + table[i] = desc | (i << stride_shift); + } + vm_table_usage(table, Ln_XLAT_NUM_ENTRIES, vmid); + } else { + /* + * Adjust usage count for parent table's entry + * that will no longer be free. + */ + vm_table_usage(pte, 1, vmid); + } + + /* Link the new table in place of the pte it replaces */ + vm_set_pte_table_desc(pte, table, level); + + return table; +} + +static int vm_set_mapping(struct arm_mmu_ptables *ptables,\ + uintptr_t virt, size_t size,\ + uint64_t desc, bool may_overwrite, uint32_t vmid) +{ + uint64_t *pte, *ptes[XLAT_LAST_LEVEL + 1]; + uint64_t level_size; + uint64_t *table = ptables->base_xlat_table; + unsigned int level = BASE_XLAT_LEVEL; + int ret = 0; + + while (size) { + __ASSERT(level <= XLAT_LAST_LEVEL, + "max translation table level exceeded\n"); + + /* Locate PTE for given virtual address and page table level */ + pte = &table[XLAT_TABLE_VA_IDX(virt, level)]; + ptes[level] = pte; + + if (vm_is_table_desc(*pte, level)) { + /* Move to the next translation table level */ + level++; + table = vm_pte_desc_table(*pte); + continue; + } + + if (!may_overwrite && !vm_is_free_desc(*pte)) { + /* the entry is already allocated */ + ret = -EBUSY; + break; + } + + level_size = 1ULL << LEVEL_TO_VA_SIZE_SHIFT(level); + + if (vm_is_desc_superset(*pte, desc, level)) { + /* This block already covers our range */ + level_size -= (virt & (level_size - 1)); + if (level_size > size) { + level_size = size; + } + goto move_on; + } + + if ((size < level_size) || (virt & (level_size - 1)) || + !vm_is_desc_block_aligned(desc, level_size)) { + /* Range doesn't fit, create subtable */ + table = vm_expand_to_table(pte, level, vmid); + if (!table) { + ret = -ENOMEM; + break; + } + level++; + continue; + } + + /* Adjust usage count for corresponding table */ + if (vm_is_free_desc(*pte)) { + vm_table_usage(pte, 1, vmid); + } + if (!desc) { + vm_table_usage(pte, -1, vmid); + } + /* Create (or erase) block/page descriptor */ + vm_set_pte_block_desc(pte, desc, level); + + /* recursively free unused tables if any */ + while (level != BASE_XLAT_LEVEL && + vm_is_table_unused(pte, vmid)) { + vm_free_table(pte, vmid); + pte = ptes[--level]; + vm_set_pte_block_desc(pte, 0, level); + vm_table_usage(pte, -1, vmid); + } + +move_on: + virt += level_size; + desc += desc ? level_size : 0; + size -= level_size; + + /* Range is mapped, start again for next range */ + table = ptables->base_xlat_table; + level = BASE_XLAT_LEVEL; + + } + + return ret; +} + +static void vm_del_mapping(uint64_t *table, uintptr_t virt, size_t size, + unsigned int level, uint32_t vmid) +{ + size_t step, level_size = 1ULL << LEVEL_TO_VA_SIZE_SHIFT(level); + uint64_t *pte, *subtable; + + for ( ; size; virt += step, size -= step) { + step = level_size - (virt & (level_size - 1)); + if (step > size) { + step = size; + } + pte = &table[XLAT_TABLE_VA_IDX(virt, level)]; + + if (vm_is_free_desc(*pte)) { + continue; + } + + if (step != level_size && vm_is_block_desc(*pte)) { + /* need to split this block mapping */ + vm_expand_to_table(pte, level, vmid); + } + + if (vm_is_table_desc(*pte, level)) { + subtable = vm_pte_desc_table(*pte); + vm_del_mapping(subtable, virt, step, level + 1, vmid); + if (!vm_is_table_unused(subtable, vmid)) { + continue; + } + vm_dec_table_ref(subtable, vmid); + } + + /* free this entry */ + *pte = 0; + vm_table_usage(pte, -1, vmid); + } +} + +/** + * @brief un_map the vm's page table entry. + */ +static int vm_remove_dev_map(struct arm_mmu_ptables *ptables, const char *name, + uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs, uint32_t vmid) +{ + int ret = 0; + k_spinlock_key_t key; + ARG_UNUSED(attrs); + + __ASSERT(((virt | size) & (CONFIG_MMU_PAGE_SIZE - 1)) == 0, + "address/size are not page aligned\n"); + + key = k_spin_lock(&vm_xlat_lock); + ret = vm_set_mapping(ptables, virt, size, 0, true, vmid); + k_spin_unlock(&vm_xlat_lock, key); + return ret; +} + +static int vm_add_dev_map(struct arm_mmu_ptables *ptables, const char *name, + uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs, uint32_t vmid) +{ + int ret; + uint64_t desc; + bool may_overwrite; + k_spinlock_key_t key; + + /*TODO: Need a stage-2 attribution set*/ + may_overwrite = false; + desc = phys; + + __ASSERT(((virt | phys | size) & (CONFIG_MMU_PAGE_SIZE - 1)) == 0, + "address/size are not page aligned\n"); + key = k_spin_lock(&vm_xlat_lock); + + ret = vm_set_mapping(ptables, virt, size, desc, may_overwrite, vmid); + k_spin_unlock(&vm_xlat_lock, key); + return ret; +} + +static int vm_add_map(struct arm_mmu_ptables *ptables, const char *name, + uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs, uint32_t vmid) +{ + bool may_overwrite = !(attrs & MT_NO_OVERWRITE); + uint64_t desc = get_vm_region_desc(attrs); + k_spinlock_key_t key; + int ret; + + desc |= phys; + + key = k_spin_lock(&vm_xlat_lock); + + /* size aligned to page size */ + size = ALIGN_TO_PAGE(size); + __ASSERT(((virt | phys | size) & (CONFIG_MMU_PAGE_SIZE - 1)) == 0, + "address/size are not page aligned\n"); + ret = vm_set_mapping(ptables, virt, size, desc, may_overwrite, vmid); + + k_spin_unlock(&vm_xlat_lock, key); + return ret; +} + +static int vm_remove_map(struct arm_mmu_ptables *ptables, const char *name, + uintptr_t virt, size_t size, uint32_t vmid) +{ + k_spinlock_key_t key; + int ret = 0; + + key = k_spin_lock(&vm_xlat_lock); + vm_del_mapping(ptables->base_xlat_table, virt, size, BASE_XLAT_LEVEL, vmid); + k_spin_unlock(&vm_xlat_lock,key); + return ret; +} + +int arch_mmap_vpart_to_block(uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs) +{ + int ret; + ARG_UNUSED(ret); + uintptr_t dest_virt = virt; + + arch_vm_mmap_pre(dest_virt, phys, size, attrs); + return 0; +} + +int arch_unmap_vpart_to_block(uintptr_t virt, size_t size) +{ + uintptr_t dest_virt = virt; + ARG_UNUSED(dest_virt); + + return 0; +} + +int arch_vm_dev_domain_unmap(uint64_t pbase, uint64_t vbase, uint64_t size, char *name, uint16_t vmid, struct arm_mmu_ptables *ptables) +{ + return vm_remove_dev_map(ptables, name, pbase, vbase, size, 0, vmid); +} + +int arch_vm_dev_domain_map(uint64_t pbase, uint64_t vbase, uint64_t size, char *name, uint16_t vmid, struct arm_mmu_ptables *ptables) +{ + uint32_t mem_attrs; + + mem_attrs = MT_DEVICE_nGnRnE | MT_P_RW_U_NA | MT_DEFAULT_SECURE_STATE; + return vm_add_dev_map(ptables, name, pbase, vbase, size, mem_attrs | MT_NO_OVERWRITE, vmid); +} + +int arch_vm_mem_domain_partition_add(struct k_mem_domain *domain, + uint32_t partition_id, uintptr_t phys_start, uint32_t vmid) +{ + struct arm_mmu_ptables *domain_ptables = &domain->arch.ptables; + struct k_mem_partition *ptn = &domain->partitions[partition_id]; + ZVM_LOG_INFO("PART_ADD: phys_start 0x%lx, virt_start 0x%lx, size 0x%lx. \n", phys_start, ptn->start, ptn->size); + return vm_add_map(domain_ptables, "vm-mmio-space", phys_start, + ptn->start, ptn->size, ptn->attr.attrs, vmid); +} + +int arch_vm_mem_domain_partition_remove(struct k_mem_domain *domain, + uint32_t partition_id, uint32_t vmid) +{ + int ret; + struct arm_mmu_ptables *domain_ptables = &domain->arch.ptables; + struct k_mem_partition *ptn = &domain->partitions[partition_id]; + ZVM_LOG_INFO("PART_ADD: virt_start 0x%lx, size 0x%lx. \n", ptn->start, ptn->size); + ret = vm_remove_map(domain_ptables, "vm-mmio-space", ptn->start, ptn->size, vmid); + return ret; +} + +void arch_vm_mem_domain_partitions_clean(struct k_mem_domain *domain, + uint32_t partitions_num, uint32_t vmid) +{ + k_spinlock_key_t key; + uint32_t p_idx; + + ARG_UNUSED(domain); + + key = k_spin_lock(&vm_xlat_lock); + for(p_idx = 0; p_idx < partitions_num; p_idx++){ + vm_xlat_use_count[vmid][p_idx] = 0; + } + k_spin_unlock(&vm_xlat_lock,key); + +} + +int arch_vm_mem_domain_init(struct k_mem_domain *domain, uint32_t vmid) +{ + struct arm_mmu_ptables *domain_ptables = &domain->arch.ptables; + k_spinlock_key_t key; + + key = k_spin_lock(&vm_xlat_lock); + domain_ptables->base_xlat_table = vm_new_table(vmid); + k_spin_unlock(&vm_xlat_lock, key); + if (!domain_ptables->base_xlat_table) { + return -ENOMEM; + } + return 0; +} diff --git a/arch/arm64/core/zvm/switch.c b/arch/arm64/core/zvm/switch.c new file mode 100644 index 00000000000000..76cfcf7847896e --- /dev/null +++ b/arch/arm64/core/zvm/switch.c @@ -0,0 +1,578 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +#define BIT_MASK0(last, first) \ + ((0xffffffffffffffffULL >> (64 - ((last) + 1 - (first)))) << (first)) +#define GET_FIELD(value, last, first) \ + (((value) & BIT_MASK0((last), (first))) >> (first)) + +static uint64_t wzr_reg = 0; + +/* VM entry function */ +extern int guest_vm_entry(struct z_vcpu *vcpu,struct zvm_vcpu_context *context); + + +static uint64_t get_fault_ipa(uint64_t hpfar_el2, uint64_t far_el2) +{ + uint64_t fault_ipa; + fault_ipa = hpfar_el2 & HPFAR_EL2_MASK; + fault_ipa = (fault_ipa >> HPFAR_EL2_SHIFT) << HPFAR_EL2_PAGE_SHIFT; + fault_ipa |= far_el2 & HPFAR_EL2_PAGE_MASK; + + return fault_ipa; +} + + +static int handle_ftrans_desc(int iss_dfsc, uint64_t pa_addr, + struct esr_dabt_area *dabt, arch_commom_regs_t *regs) +{ + int ret = 0; + struct z_vcpu *vcpu = _current_vcpu; + uint64_t esr_elx = vcpu->arch->fault.esr_el2; + uint16_t reg_index = dabt->srt; + uint64_t *reg_value; + reg_value = find_index_reg(reg_index, regs); + if (reg_value == NULL) { + reg_value = &wzr_reg; + } + + /* check that if it is a device memory fault */ + ret = handle_vm_device_emulate(vcpu->vm, pa_addr); + if(ret){ + /* pci initial sucessful. */ + if(ret > 0){ + return 0; + } + reg_value = find_index_reg(reg_index, regs); + *reg_value = 0xfefefefefefefefe; + ZVM_LOG_ERR("Unable to handle Date abort in address: 0x%llx ! \n", pa_addr); + ZVM_LOG_ERR("A stage-2 translation table need to set for this device address 0x%llx.\n", pa_addr); + /** + * if the device is allocated, whether it can be emulated + * by virtIO? + */ + }else{ + ret = vm_mem_domain_partitions_add(vcpu->vm->vmem_domain); + vcpu->arch->ctxt.regs.pc -= (GET_ESR_IL(esr_elx)) ? 4 : 2; + } + + return ret; +} + +static int handle_faccess_desc(int iss_dfsc, uint64_t pa_addr, + struct esr_dabt_area *dabt, arch_commom_regs_t *regs) +{ + int ret; + uint8_t size; + uint16_t reg_index = dabt->srt; + uint16_t iss_isv, iss_sas; + uint64_t addr = pa_addr, *reg_value; + + iss_isv = dabt->isv; + if (!iss_isv) { + ZVM_LOG_WARN("Instruction syndrome not valid\n"); + return -EFAULT; + } + + reg_value = find_index_reg(reg_index, regs); + if (reg_value == NULL) { + reg_value = &wzr_reg; + } + + iss_sas = dabt->sas; + switch (iss_sas) { + case ISS_SAS_8BIT: + size = 1; + break; + case ISS_SAS_16BIT: + size = 2; + break; + case ISS_SAS_32BIT: + size = 4; + break; + case ISS_SAS_64BIT: + size = 8; + break; + default: + ZVM_LOG_WARN("unsupport data size\n"); + return -EFAULT; + } + + ret = vdev_mmio_abort(regs, dabt->wnr, addr, reg_value, size); + if (ret < 0) { + ZVM_LOG_WARN("Handle mmio read/write failed! The addr: %llx \n", addr); + return -ENODEV; + } + return ret; +} + +static int cpu_unknwn_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + ZVM_LOG_WARN("Unknow sync type! \n "); + return 0; +} + +static int cpu_wfi_wfe_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + uint32_t condition, esr_iss; + struct z_vcpu *vcpu = _current_vcpu; + + esr_iss = GET_ESR_ISS(esr_elx); + if(esr_iss & BIT(ESR_ISS_CV_SHIFT)){ + condition = GET_ESR_ISS_COND(esr_elx); + if((condition & 0x1) && (condition != 0xf)){ + return -ESRCH; + } + }else{ + /* TODO: support aarch32 VM.*/ + return -ESRCH; + } + /* WFE */ + if(esr_iss & 0x01){ + if(vcpu->vcpu_state == _VCPU_STATE_RUNNING){ + vm_vcpu_ready(vcpu); + } + }else{ /* WFI */ + vcpu_wait_for_irq(vcpu); + } + + return 0; +} + +static int cpu_dmcr_mrc_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + return 0; +} + +static int cpu_dmcrr_mrrc_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + return 0; +} + +static int cpu_simd_fp_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + return 0; +} + +static int cpu_il_exe_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + return 0; +} + +static int cpu_hvc64_sync(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + int ret = 0; + unsigned long hvc_imm; + + hvc_imm = GET_FIELD((esr_elx), 15, 0); + /*hvc_imm != 0 means that it is not a psci hvc.*/ + if(hvc_imm) { + ZVM_LOG_WARN("HVC instruction is not a psci call! \n"); + return ret; + } + + ret = do_psci_call(vcpu, arch_ctxt); + + return ret; +} + +static int cpu_system_msr_mrs_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + uint32_t reg_index, reg_name; + uint32_t this_esr = esr_elx; + uint64_t *reg_value ; + struct esr_sysreg_area *esr_sysreg = (struct esr_sysreg_area *)&this_esr; + struct z_vcpu *vcpu = _current_vcpu; + + reg_index = esr_sysreg->rt; + /* the operation is write */ + if (!esr_sysreg->dire) { + reg_value = find_index_reg(reg_index, arch_ctxt); + } + + reg_name = this_esr & ESR_SYSINS_REGS_MASK; + switch (reg_name) { + /* supporte sgi related register here */ + case ESR_SYSINSREG_SGI0R_EL1: + case ESR_SYSINSREG_SGI1R_EL1: + case ESR_SYSINSREG_ASGI1R_EL1: + if (!esr_sysreg->dire) { + vgicv3_raise_sgi(vcpu, *reg_value); + } + break; + case ESR_SYSINSREG_CNTPCT_EL0: + /* The process for VM's timer, emulate timer register access */ + case ESR_SYSINSREG_CNTP_TVAL_EL0: + simulate_timer_cntp_tval(vcpu, esr_sysreg->dire, reg_value); + break; + case ESR_SYSINSREG_CNTP_CTL_EL0: + simulate_timer_cntp_ctl(vcpu, esr_sysreg->dire, reg_value); + break; + case ESR_SYSINSREG_CNTP_CVAL_EL0: + simulate_timer_cntp_cval(vcpu, esr_sysreg->dire, reg_value); + break; + + default: + ZVM_LOG_WARN("Can not emulate provided register here, the register is 0x%x \n", reg_name); + return -ENODEV; + break; + } + + return 0; +} + +static int cpu_inst_abort_low_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + uint64_t ipa_ddr; + ipa_ddr = get_fault_ipa(read_hpfar_el2(), read_far_el2()); + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + return 0; +} + +static int cpu_inst_abort_cur_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + return 0; +} + +static int cpu_misaligned_pc_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + return 0; +} + +static int cpu_data_abort_low_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + int ret, iss_dfsc; + uint64_t ipa_ddr; + uint64_t db_esr = esr_elx; + struct esr_dabt_area *dabt = (struct esr_dabt_area *)&db_esr; + + iss_dfsc = dabt->dfsc & ~(0x3); + ipa_ddr = get_fault_ipa(read_hpfar_el2(), read_far_el2()); + + switch (iss_dfsc) { + /* translation fault level0-3*/ + case DFSC_FT_TRANS_L3: + case DFSC_FT_TRANS_L2: + case DFSC_FT_TRANS_L1: + case DFSC_FT_TRANS_L0: + ret = handle_ftrans_desc(iss_dfsc, ipa_ddr, dabt, arch_ctxt); + break; + /* access fault level0-3*/ + case DFSC_FT_ACCESS_L3: + case DFSC_FT_ACCESS_L2: + case DFSC_FT_ACCESS_L1: + case DFSC_FT_ACCESS_L0: + ret = handle_faccess_desc(iss_dfsc, ipa_ddr, dabt, arch_ctxt); + break; + /* premission fault level0-3*/ + case DFSC_FT_PERM_L3: + case DFSC_FT_PERM_L2: + case DFSC_FT_PERM_L1: + case DFSC_FT_PERM_L0: + default: + ZVM_LOG_WARN("Stage-2 error without translation fault: %016llx ! VM stop! \n", ipa_ddr); + ret = -ENODEV; + break; + } + + return ret; +} + +static int cpu_data_abort_cur_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + return 0; +} + +static int cpu_misaligned_sp_sync(arch_commom_regs_t *arch_ctxt, uint64_t esr_elx) +{ + ARG_UNUSED(arch_ctxt); + ARG_UNUSED(esr_elx); + return 0; +} + +static int arch_vm_trap_sync(struct z_vcpu *vcpu) +{ + int err = 0; + uint64_t esr_elx; + arch_commom_regs_t *arch_ctxt; + + esr_elx = vcpu->arch->fault.esr_el2; + arch_ctxt = &vcpu->arch->ctxt.regs; + switch (GET_ESR_EC(esr_elx)) { + case 0b000000: /* 0x00: "Unknown reason" */ + err = cpu_unknwn_sync(arch_ctxt, esr_elx); + break; + case 0b000001: /* 0x01: "Trapped WFI or WFE instruction execution" */ + err = cpu_wfi_wfe_sync(arch_ctxt, esr_elx); + break; + case 0b000011: /* 0x03: "Trapped MCR or MRC access */ + err = cpu_dmcr_mrc_sync(arch_ctxt, esr_elx); + break; + case 0b000100: /* 0x04: "Trapped MCRR or MRRC access */ + err = cpu_dmcrr_mrrc_sync(arch_ctxt, esr_elx); + break; + case 0b000101: /* 0x05 */ + case 0b000110: /* 0x06 */ + goto handler_failed; + break; + case 0b000111: /* 0x07: "Trapped access to SVE, Advanced SIMD, or + floating-point functionality" */ + err = cpu_simd_fp_sync(arch_ctxt, esr_elx); + break; + case 0b001100: /* 0x0c */ + case 0b001101: /* 0x0d */ + goto handler_failed; + break; + case 0b001110: /* 0x0e: "Illegal Execution state" */ + err = cpu_il_exe_sync(arch_ctxt, esr_elx); + break; + case 0b010001: /* 0x11 */ + goto handler_failed; + break; + case 0b010110: /* 0x16: "HVC instruction execution in AArch64 state" */ + err = cpu_hvc64_sync(vcpu, arch_ctxt, esr_elx); + break; + case 0b011000: /* 0x18: "Trapped MSR, MRS or System instruction execution in + AArch64 state */ + err = cpu_system_msr_mrs_sync(arch_ctxt, esr_elx); + break; + case 0b011001: /* 0x19 */ + goto handler_failed; + break; + case 0b100000: /* 0x20: "Instruction Abort from a lower Exception level, that + might be using AArch32 or AArch64" */ + err = cpu_inst_abort_low_sync(arch_ctxt, esr_elx); + break; + case 0b100001: /* 0x21: "Instruction Abort taken without a change in Exception level." */ + err = cpu_inst_abort_cur_sync(arch_ctxt, esr_elx); + break; + case 0b100010: /* 0x22: "PC alignment fault exception." */ + err = cpu_misaligned_pc_sync(arch_ctxt, esr_elx); + break; + case 0b100100: /* 0x24: "Data Abort from a lower Exception level, that might + be using AArch32 or AArch64" */ + err = cpu_data_abort_low_sync(arch_ctxt, esr_elx); + break; + case 0b100101: /* 0x25: "Data Abort taken without a change in Exception level" */ + err = cpu_data_abort_cur_sync(arch_ctxt, esr_elx); + break; + case 0b100110: /* 0x26: "SP alignment fault exception" */ + err = cpu_misaligned_sp_sync(arch_ctxt, esr_elx); + break; + case 0b101000: /* 0x28 */ + case 0b101100: /* 0x2c */ + case 0b101111: /* 0x2f */ + case 0b110000: /* 0x30 */ + default: + goto handler_failed; + } + + if (GET_ESR_EC(esr_elx) != 0b010110) + vcpu->arch->ctxt.regs.pc += (GET_ESR_IL(esr_elx)) ? 4 : 2; + return err; + +handler_failed: + ZVM_LOG_WARN("ZVM do not support this exit code: %lld. \n", GET_ESR_EC(esr_elx)); + return -ENODEV; +} + +static void vm_disable_daif(void) +{ + disable_debug_exceptions(); + disable_serror_exceptions(); + disable_fiq(); + disable_irq(); +} + +static void vm_enable_daif(void) +{ + enable_debug_exceptions(); + enable_fiq(); + enable_serror_exceptions(); + enable_irq(); +} + +static int vm_flush_vgic(struct z_vcpu *vcpu) +{ + int ret = 0; + + ret = virt_irq_flush_vgic(vcpu); + if (ret) { + ZVM_LOG_ERR("Flush vgic info failed, Unknow reason \n"); + } + return ret; +} + +static int vm_sync_vgic(struct z_vcpu *vcpu) +{ + int ret = 0; + + ret = virt_irq_sync_vgic(vcpu); + if (ret) { + ZVM_LOG_ERR("Sync vgic info failed, Unknow reason \n"); + } + return ret; +} + +static int arch_vm_irq_trap(struct z_vcpu *vcpu) +{ + ARG_UNUSED(vcpu); + vm_enable_daif(); + return 0; +} + +static void arch_vm_serror_trap(struct z_vcpu *vcpu, int exit_code) +{ + uint64_t disr; + uint64_t esr; + + if (ARM_VM_SERROR_PENDING(exit_code)) { + disr = vcpu->arch->fault.disr_el1; + + esr = (0x2f << 26); + if(disr & BIT(24)) + esr |= (disr & ((1<<25) - 1)); + else + esr |= (disr & (0x7<<10 | 0x1<<9 | 0x3f)); + } +} + +int arch_vcpu_run(struct z_vcpu *vcpu) +{ + int ret; + uint16_t exit_type = 0; + + /* mask all interrupt here to disable interrupt */ + vm_disable_daif(); + ret = vm_flush_vgic(vcpu); + if (ret) { + return ret; + } + + if(vcpu->vm->reboot){ + vcpu_sysreg_load(vcpu); + vcpu->vm->reboot=false; + } + + switch_to_guest_sysreg(vcpu); + + /* Jump to the fire too! */ + exit_type = guest_vm_entry(vcpu, &vcpu->arch->host_ctxt); + vcpu->exit_type = exit_type; + + switch_to_host_sysreg(vcpu); + + vm_sync_vgic(vcpu); + switch (exit_type) { + case ARM_VM_EXCEPTION_SYNC: + ret = arch_vm_trap_sync(vcpu); + break; + case ARM_VM_EXCEPTION_IRQ: + ret = arch_vm_irq_trap(vcpu); + break; + case ARM_VM_EXCEPTION_SERROR: + arch_vm_serror_trap(vcpu, exit_type); + ZVM_LOG_WARN("SError exception type in this stage....\n"); + break; + case ARM_VM_EXCEPTION_IRQ_IN_SYNC: + ret = arch_vm_irq_trap(vcpu); + break; + default: + ZVM_LOG_WARN("Unsupported exception....\n Exit code: 0x%08llx \t exit_type: 0x%08x ....\n", read_esr_el2(), exit_type); + return -ESRCH; + } + + return ret; +} + +bool zvm_switch_handle_pre(uint32_t irq) +{ + struct k_thread *thread; + struct z_vcpu *vcpu; + + if( (vcpu = _current_vcpu) == NULL){ + return false; + } + + /* If it is a vcpu thread, judge whether the signal is send to it */ + if(!vcpu->vm->vm_irq_block.irq_bitmap[irq]){ + return false; + } + + thread = vcpu->work->vcpu_thread; + thread->base.thread_state |= _THREAD_VCPU_NO_SWITCH; + + return true; +} + +uint64_t get_zvm_host_context(void) +{ + struct k_thread *thread = _current; + struct z_vcpu *vcpu = thread->vcpu_struct; + + if (!vcpu) { + return 0; + } + return (uint64_t)&(vcpu->arch->host_ctxt); +} + +void* z_vm_lower_sync_handler(uint64_t esr_elx) +{ + struct z_vcpu *vcpu = _current_vcpu; + if (vcpu == NULL) { + ZVM_LOG_WARN("EL2 sync occur, get vcpu struct failed "); + } + + vcpu->arch->fault.esr_el2 = esr_elx; + return (void*)vcpu; +} + +void* z_vm_lower_irq_handler(struct arch_esf *esf_ctxt) +{ + ARG_UNUSED(esf_ctxt); + struct z_vcpu *vcpu = _current_vcpu; + if (vcpu == NULL) { + ZVM_LOG_WARN("EL2 irq occur, get vcpu struct failed "); + } + + return (void *)vcpu; +} diff --git a/arch/arm64/core/zvm/timer.c b/arch/arm64/core/zvm/timer.c new file mode 100644 index 00000000000000..711c0f18b99f8e --- /dev/null +++ b/arch/arm64/core/zvm/timer.c @@ -0,0 +1,394 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +#define VIRT_VTIMER_NAME arm_arch_timer +struct zvm_arch_timer_info { + uint32_t virt_irq; + uint32_t phys_irq; +}; + +/* Global timer info */ +static struct zvm_arch_timer_info zvm_global_vtimer_info; +static struct k_spinlock virt_vtimer_lock; +static struct k_spinlock virt_ptimer_lock; + +/** + * @brief Initializes _timeout struct for virtual timer + */ +static inline void init_virt_timer_timeout(struct _timeout *timeout, void *func) +{ + timeout->dticks = 0; + timeout->fn = func; + sys_dnode_init(&timeout->node); +} + +/** + * @brief Get the global timer information and pass it on vcpu. + */ +static inline void get_global_timer_info(struct virt_timer_context *vtimer_ctxt) +{ + vtimer_ctxt->virt_virq = zvm_global_vtimer_info.virt_irq; + vtimer_ctxt->virt_pirq = zvm_global_vtimer_info.phys_irq; +} + +/** + * @brief Virtual vtimer isr function for process irq. + */ +static int arm_arch_virt_vtimer_compare_isr(void *dev) +{ + ARG_UNUSED(dev); + int ret; + uint32_t cntvctl; + k_spinlock_key_t key = k_spin_lock(&virt_vtimer_lock); + struct z_vcpu *vcpu = _current_vcpu; + struct virt_timer_context *ctxt = vcpu->arch->vtimer_context; + + cntvctl = read_cntv_ctl_el02(); + if(!(cntvctl & CNTV_CTL_ISTAT_BIT)){ + ZVM_LOG_WARN("No virt vtimer interrupt but signal raise! \n"); + return -EINTR; + } + ctxt->cntv_ctl = cntvctl | CNTV_CTL_IMASK_BIT; + + ret = set_virq_to_vcpu(vcpu, ctxt->virt_virq); + if(ret) { + k_spin_unlock(&virt_vtimer_lock, key); + ZVM_LOG_WARN("Set vtimer irq to vm failed! \n"); + return ret; + } + + k_spin_unlock(&virt_vtimer_lock, key); + + return 0; +} + +/** + * @brief Virtual ptimer isr function for process irq. + */ +static int arm_arch_virt_ptimer_compare_isr(void *dev) +{ + ARG_UNUSED(dev); + int ret; + k_spinlock_key_t key = k_spin_lock(&virt_ptimer_lock); + struct z_vcpu *vcpu = _current_vcpu; + struct virt_timer_context *ctxt = vcpu->arch->vtimer_context; + + ret = set_virq_to_vcpu(vcpu, ctxt->virt_pirq); + if(ret){ + k_spin_unlock(&virt_ptimer_lock, key); + return ret; + } + + k_spin_unlock(&virt_ptimer_lock, key); + + return 0; +} + +/** + * @brief Processing virtual vtimer timeout for vm. + */ +static void virt_vtimer_expiry(struct _timeout *t) +{ + int virq_num = zvm_global_vtimer_info.virt_irq; + struct virt_timer_context *ctxt; + struct z_vcpu *vcpu; + + ctxt = CONTAINER_OF(t, struct virt_timer_context, vtimer_timeout); + if(ctxt == NULL){ + ZVM_LOG_WARN("The virt_vtimer context is not exist! \n"); + return; + } + + ctxt->cntv_ctl |= CNTV_CTL_IMASK_BIT; + vcpu = (struct z_vcpu*)ctxt->vcpu; + + set_virq_to_vcpu(vcpu, virq_num); +} + +/** + * @brief Processing virtual ptimer timeout for vm. + */ +static void virt_ptimer_expiry(struct _timeout *t) +{ + int virq_num = zvm_global_vtimer_info.phys_irq; + struct virt_timer_context *ctxt; + struct z_vcpu *vcpu; + + ctxt = CONTAINER_OF(t, struct virt_timer_context, ptimer_timeout); + if(ctxt == NULL){ + ZVM_LOG_WARN("The virt_ptimer context is not exist! \n"); + return; + } + + ctxt->cntp_ctl |= CNTV_CTL_IMASK_BIT; + vcpu = (struct z_vcpu*)ctxt->vcpu; + + set_virq_to_vcpu(vcpu, virq_num); +} + +/** + * @brief Simulate cntp_tval_el0 register + */ +void simulate_timer_cntp_tval(struct z_vcpu *vcpu, int read, uint64_t *value) +{ + uint64_t cycles; + struct virt_timer_context *ctxt; + + ctxt = vcpu->arch->vtimer_context; + cycles = arm_arch_timer_count() - ctxt->timer_offset; + + if (read) { +#ifdef CONFIG_HAS_ARM_VHE + *value = read_cntp_tval_el02(); +#else + uint64_t ns; + ns = (ctxt->cntp_tval - cycles - ctxt->timer_offset) & 0xffffffff; + *value = ns; +#endif + } else { +#ifdef CONFIG_HAS_ARM_VHE + write_cntp_tval_el02(*value); +#else + ctxt->cntp_cval = arm_arch_timer_count() + *value; +#endif + } +} + +/** + * @brief Simulate cntp_cval_el0 register + */ +void simulate_timer_cntp_cval(struct z_vcpu *vcpu, int read, uint64_t *value) +{ + unsigned long ns; + k_timeout_t vticks; + struct virt_timer_context *ctxt; + + ctxt = vcpu->arch->vtimer_context; + + if (read) { +#ifdef CONFIG_HAS_ARM_VHE + *value = read_cntp_cval_el02(); +#else + *value = ctxt->cntp_cval; +#endif + } else { +#ifdef CONFIG_HAS_ARM_VHE + ARG_UNUSED(ns); + ARG_UNUSED(vticks); + write_cntp_cval_el02(*value); + ctxt->cntp_cval = read_cntp_cval_el02(); +#else + ctxt->cntp_cval = *value + ctxt->timer_offset; + if (ctxt->cntp_ctl & CNTV_CTL_ENABLE_BIT) { + ctxt->cntp_ctl &= ~CNTV_CTL_ISTAT_BIT; + vticks.ticks = (ctxt->cntp_cval + ctxt->timer_offset)/HOST_CYC_PER_TICK; + z_add_timeout(&ctxt->ptimer_timeout, ctxt->ptimer_timeout.fn, vticks); + } +#endif + } +} + +/** + * @brief Simulate cntp_ctl register + */ +void simulate_timer_cntp_ctl(struct z_vcpu *vcpu, int read, uint64_t *value) +{ + uint32_t reg_value = (uint32_t)(*value); + k_timeout_t vticks; + struct virt_timer_context *ctxt; + + ctxt = vcpu->arch->vtimer_context; + + if (read) { +#ifdef CONFIG_HAS_ARM_VHE + ARG_UNUSED(reg_value); + ARG_UNUSED(vticks); + *value = read_cntp_ctl_el02(); +#else + *value = ctxt->cntp_ctl; +#endif + } else { +#ifdef CONFIG_HAS_ARM_VHE + write_cntp_ctl_el02(*value); + ctxt->cntp_ctl = read_cntp_ctl_el02(); + /* TODO: Add softirq support*/ +#else + reg_value &= ~CNTV_CTL_ISTAT_BIT; + + if (reg_value & CNTV_CTL_ENABLE_BIT) + reg_value |= ctxt->cntp_ctl & CNTV_CTL_ISTAT_BIT; + ctxt->cntp_ctl = reg_value; + + if ((ctxt->cntp_ctl & CNTV_CTL_ENABLE_BIT) && (ctxt->cntp_cval != 0)) { + vticks.ticks = (ctxt->cntp_cval + ctxt->timer_offset)/HOST_CYC_PER_TICK; + z_add_timeout(&ctxt->ptimer_timeout, ctxt->ptimer_timeout.fn, vticks); + } +#endif + } +} + +/** + * @brief Initializes the virtual timer context for the vcpu: + * This needs to be done when the vcpu is created. The step is below: + * 1. Init vtimer and ptimer register. + * 2. Add a timer expiry function for vcpu. + * 3. Add a callbak function. + */ +int arch_vcpu_timer_init(struct z_vcpu *vcpu) +{ + bool *bit_map; + struct virt_timer_context *ctxt; + struct vcpu_arch *arch = vcpu->arch; + struct virt_irq_desc *irq_desc; + + arch->vtimer_context = (struct virt_timer_context *)k_malloc(sizeof(struct virt_timer_context)); + if(!arch->vtimer_context) { + ZVM_LOG_ERR("Init vcpu_arch->vtimer failed"); + return -ENXIO; + } + + /* Default vcpu, get the count as offset */ + if (vcpu->vcpu_id == 0) { + vcpu->vm->vtimer_offset = arm_arch_timer_count(); + } + + ctxt = vcpu->arch->vtimer_context; + ctxt->vcpu = vcpu; + ctxt->timer_offset = vcpu->vm->vtimer_offset; + ctxt->enable_flag = false; + + /* init virt_timer struct */ + ctxt->cntv_ctl = CNTV_CTL_IMASK_BIT; + ctxt->cntv_cval = 0; + ctxt->cntv_tval = 0; + ctxt->cntp_ctl = CNTV_CTL_IMASK_BIT; + ctxt->cntp_cval = 0; + ctxt->cntp_tval = 0; + + /* get virt timer irq */ + get_global_timer_info(ctxt); + + init_virt_timer_timeout(&ctxt->vtimer_timeout, virt_vtimer_expiry); + init_virt_timer_timeout(&ctxt->ptimer_timeout, virt_ptimer_expiry); + + /*El1 physical and virtual timer. */ + bit_map = vcpu->vm->vm_irq_block.irq_bitmap; + bit_map[ctxt->virt_virq] = true; + bit_map[ctxt->virt_pirq] = true; + + /*Make VM directly access virt timer register.*/ + irq_desc = vgic_get_virt_irq_desc(vcpu, ctxt->virt_virq); + irq_desc->virq_flags |= VIRQ_HW_FLAG; + + return 0; +} + +int arch_vcpu_timer_deinit(struct z_vcpu *vcpu) +{ + ARG_UNUSED(vcpu); + uint64_t cnt_ctl; + + cnt_ctl = read_cntv_ctl_el02(); + cnt_ctl &= ~CNTV_CTL_ENABLE_BIT; + write_cntv_ctl_el02(cnt_ctl); + + cnt_ctl = read_cntp_ctl_el02(); + cnt_ctl &= ~CNTP_CTL_ENABLE_BIT; + write_cntp_ctl_el02(cnt_ctl); + + return 0; +} + +static void virt_arm_ptimer_init(void) +{ + uint64_t cntp_ctl; + + IRQ_CONNECT(ARM_ARCH_VIRT_PTIMER_IRQ, ARM_ARCH_VIRT_PTIMER_PRIO, + arm_arch_virt_ptimer_compare_isr, NULL, ARM_ARCH_VIRT_PTIMER_FLAGS); + /* disable ptimer for vm */ +#if defined(CONFIG_HAS_ARM_VHE) + cntp_ctl = read_cntp_ctl_el02(); + cntp_ctl &= ~CNTP_CTL_ENABLE_BIT; + write_cntp_ctl_el02(cntp_ctl); +#endif +} + +static void virt_arm_vtimer_init(void) +{ + uint64_t cntv_ctl; + + IRQ_CONNECT(ARM_ARCH_VIRT_VTIMER_IRQ, ARM_ARCH_VIRT_VTIMER_PRIO, + arm_arch_virt_vtimer_compare_isr, NULL, ARM_ARCH_VIRT_VTIMER_FLAGS); + /* disable vtimer for vm */ +#if defined(CONFIG_HAS_ARM_VHE) + cntv_ctl = read_cntv_ctl_el02(); + cntv_ctl &= ~CNTV_CTL_ENABLE_BIT; + write_cntv_ctl_el02(cntv_ctl); +#endif +} + +static int virt_arm_arch_timer_init(void) +{ + /* get vtimer irq */ + zvm_global_vtimer_info.virt_irq = ARM_ARCH_VIRT_VTIMER_IRQ; + zvm_global_vtimer_info.phys_irq = ARM_ARCH_VIRT_PTIMER_IRQ; + + if( (zvm_global_vtimer_info.virt_irq > 32) || (zvm_global_vtimer_info.virt_irq < 0)){ + ZVM_LOG_ERR("Can not get vtimer virt struct from hw. \n"); + return -EINTR; + } + if( (zvm_global_vtimer_info.phys_irq > 32) || (zvm_global_vtimer_info.phys_irq < 0)){ + ZVM_LOG_ERR("Can not get vtimer phys struct from hw. \n"); + return -EINTR; + } + + virt_arm_vtimer_init(); + virt_arm_ptimer_init(); + + return 0; +} + + +static struct virt_device_config virt_arm_arch_timer_cfg = { + .hirq_num = 0, + .device_config = NULL, +}; + +static struct virt_device_data virt_arm_arch_timer_data_port = { + .device_data = NULL, +}; + +/** + * @brief vserial device operations api. +*/ +static const struct virt_device_api virt_arm_arch_timer_api = { + .init_fn = NULL, + .deinit_fn = NULL, + .virt_device_read = NULL, + .virt_device_write = NULL, +}; + +ZVM_VIRTUAL_DEVICE_DEFINE(virt_arm_arch_timer_init, + POST_KERNEL, CONFIG_VIRT_ARM_ARCH_TIMER_PRIORITY, + VIRT_VTIMER_NAME, + virt_arm_arch_timer_data_port, + virt_arm_arch_timer_cfg, + virt_arm_arch_timer_api); \ No newline at end of file diff --git a/arch/arm64/include/kernel_arch_data.h b/arch/arm64/include/kernel_arch_data.h index 8b607c1dbf47d2..ffb9c294ff4a3b 100644 --- a/arch/arm64/include/kernel_arch_data.h +++ b/arch/arm64/include/kernel_arch_data.h @@ -39,6 +39,16 @@ extern "C" { typedef struct arch_esf _esf_t; typedef struct __basic_sf _basic_sf_t; +#ifdef CONFIG_ZVM +#include +#include + +typedef struct zvm_vcpu_context zvm_vcpu_context_t; +typedef struct arch_commom_regs arch_commom_regs_t; +typedef struct z_vcpu vcpu_t; +typedef struct vcpu_arch vcpu_arch_t; +#endif + #ifdef __cplusplus } #endif diff --git a/arch/arm64/include/zvm_offsets_short_arch.h b/arch/arm64/include/zvm_offsets_short_arch.h new file mode 100644 index 00000000000000..a3c522e1698e99 --- /dev/null +++ b/arch/arm64/include/zvm_offsets_short_arch.h @@ -0,0 +1,73 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZVM_ARCH_ARM64_INCLUDE_OFFSETS_SHORT_ARCH_H_ +#define ZVM_ARCH_ARM64_INCLUDE_OFFSETS_SHORT_ARCH_H_ + +#include + +/* below macro is for hyp code offset */ +#define _zvm_vcpu_ctxt_arch_regs_to_callee_saved_x19_20 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_callee_saved_regs_OFFSET + \ + ___callee_saved_t_x19_x20_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_callee_saved_x21_x22 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_callee_saved_regs_OFFSET + \ + ___callee_saved_t_x21_x22_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_callee_saved_x23_x24 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_callee_saved_regs_OFFSET + \ + ___callee_saved_t_x23_x24_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_callee_saved_x25_x26 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_callee_saved_regs_OFFSET + \ + ___callee_saved_t_x25_x26_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_callee_saved_x27_x28 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_callee_saved_regs_OFFSET + \ + ___callee_saved_t_x27_x28_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_callee_saved_x29_sp_el0 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_callee_saved_regs_OFFSET + \ + ___callee_saved_t_x29_sp_el0_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_callee_saved_sp_elx \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_callee_saved_regs_OFFSET + \ + ___callee_saved_t_sp_elx_lr_OFFSET) + +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x0_x1 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x0_x1_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x2_x3 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x2_x3_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x4_x5 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x4_x5_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x6_x7 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x6_x7_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x8_x9 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x8_x9_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x10_x11 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x10_x11_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x12_x13 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x12_x13_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x14_x15 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x14_x15_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x16_x17 \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x16_x17_OFFSET) +#define _zvm_vcpu_ctxt_arch_regs_to_esf_t_x18_lr \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_esf_handle_regs_OFFSET + \ + ___esf_t_x18_lr_OFFSET) + +#define _vcpu_arch_to_ctxt \ + (__vcpu_t_arch_OFFSET + __vcpu_arch_t_ctxt_OFFSET ) + +#define _vcpu_context_t_regs_to_lr \ + (__zvm_vcpu_context_t_regs_OFFSET + __arch_commom_regs_t_lr_OFFSET ) + +#endif /* ZVM_ARCH_ARM64_INCLUDE_OFFSETS_SHORT_ARCH_H_ */ diff --git a/boards/deprecated.cmake b/boards/deprecated.cmake index f1a40a8b318726..8fb30e4165b48e 100644 --- a/boards/deprecated.cmake +++ b/boards/deprecated.cmake @@ -686,6 +686,9 @@ set(qemu_cortex_a53_smp_DEPRECATED set(qemu_cortex_a53_xip_DEPRECATED qemu_cortex_a53/qemu_cortex_a53/xip ) +set(qemu_max_smp_DEPRECATED + qemu_max/qemu_max/smp +) set(qemu_malta_be_DEPRECATED qemu_malta/qemu_malta/be ) diff --git a/boards/qemu/max/Kconfig b/boards/qemu/max/Kconfig new file mode 100644 index 00000000000000..0e10f4655f5cdc --- /dev/null +++ b/boards/qemu/max/Kconfig @@ -0,0 +1,3 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 \ No newline at end of file diff --git a/boards/qemu/max/Kconfig.defconfig b/boards/qemu/max/Kconfig.defconfig new file mode 100644 index 00000000000000..1f7a0da922e42a --- /dev/null +++ b/boards/qemu/max/Kconfig.defconfig @@ -0,0 +1,10 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +if BOARD_QEMU_MAX + +config BUILD_OUTPUT_BIN + default y + +endif # BOARD_QEMU_MAX diff --git a/boards/qemu/max/Kconfig.qemu_max b/boards/qemu/max/Kconfig.qemu_max new file mode 100644 index 00000000000000..551adaecbad1d7 --- /dev/null +++ b/boards/qemu/max/Kconfig.qemu_max @@ -0,0 +1,6 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +config BOARD_QEMU_MAX + select SOC_QEMU_MAX diff --git a/boards/qemu/max/board.cmake b/boards/qemu/max/board.cmake new file mode 100644 index 00000000000000..ffaa97f0f92d8e --- /dev/null +++ b/boards/qemu/max/board.cmake @@ -0,0 +1,22 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +set(SUPPORTED_EMU_PLATFORMS qemu) +set(QEMU_ARCH aarch64) + +set(QEMU_CPU_TYPE_${ARCH} max) + +if(CONFIG_ARMV8_A_NS) +set(QEMU_MACH virt,gic-version=3) +else() +set(QEMU_MACH virt,secure=on,gic-version=3) +endif() + +set(QEMU_FLAGS_${ARCH} + -cpu ${QEMU_CPU_TYPE_${ARCH}} + -nographic + -machine ${QEMU_MACH} + ) + +board_set_debugger_ifnset(qemu) diff --git a/boards/qemu/max/board.yml b/boards/qemu/max/board.yml new file mode 100644 index 00000000000000..6b0510ceb673cd --- /dev/null +++ b/boards/qemu/max/board.yml @@ -0,0 +1,7 @@ +board: + name: qemu_max + vendor: arm + socs: + - name: qemu_max + variants: + - name: smp diff --git a/boards/qemu/max/qemu_max.dts b/boards/qemu/max/qemu_max.dts new file mode 100644 index 00000000000000..4cb04ae53b160f --- /dev/null +++ b/boards/qemu/max/qemu_max.dts @@ -0,0 +1,39 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/dts-v1/; +#include + +/ { + model = "QEMU MAX"; + compatible = "qemu,arm-max"; + + psci { + compatible = "arm,psci-0.2"; + method = "smc"; + }; + + chosen { + zephyr,sram = &sram0; + zephyr,console = &uart0; + zephyr,shell-uart = &uart0; + zephyr,flash = &flash0; + }; + + soc { + sram0: memory@40000000 { + compatible = "mmio-sram"; + reg = <0x0 0x40000000 0x0 DT_SIZE_M(512)>; + }; + }; + +}; + +&uart0 { + status = "okay"; + current-speed = <115200>; +}; diff --git a/boards/qemu/max/qemu_max.yaml b/boards/qemu/max/qemu_max.yaml new file mode 100644 index 00000000000000..e220723c32ed56 --- /dev/null +++ b/boards/qemu/max/qemu_max.yaml @@ -0,0 +1,15 @@ +identifier: qemu_max +name: QEMU Emulation for MAX (ARM) +type: qemu +simulation: qemu +arch: arm64 +toolchain: + - zephyr + - cross-compile +ram: 128 +testing: + default: true + ignore_tags: + - net + - bluetooth +vendor: arm \ No newline at end of file diff --git a/boards/qemu/max/qemu_max_defconfig b/boards/qemu/max/qemu_max_defconfig new file mode 100644 index 00000000000000..2e930ceff813db --- /dev/null +++ b/boards/qemu/max/qemu_max_defconfig @@ -0,0 +1,18 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +# Basic board infomation +CONFIG_ARM_ARCH_TIMER=y +CONFIG_QEMU_ICOUNT=n + +CONFIG_ARMV8_A_NS=y +CONFIG_CACHE_MANAGEMENT=y +CONFIG_TIMEOUT_64BIT=y +CONFIG_ARM64_SET_VMPIDR_EL2=y +CONFIG_ARM64_SET_VPIDR_EL2=y + +# Enable UART driver +CONFIG_SERIAL=y +CONFIG_UART_PL011=y +CONFIG_UART_INTERRUPT_DRIVEN=y diff --git a/boards/qemu/max/qemu_max_qemu_max_smp.dts b/boards/qemu/max/qemu_max_qemu_max_smp.dts new file mode 100644 index 00000000000000..57fe21264ce7ea --- /dev/null +++ b/boards/qemu/max/qemu_max_qemu_max_smp.dts @@ -0,0 +1,8 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "qemu_max.dts" diff --git a/boards/qemu/max/qemu_max_qemu_max_smp.yaml b/boards/qemu/max/qemu_max_qemu_max_smp.yaml new file mode 100644 index 00000000000000..41f2fddf8bdabd --- /dev/null +++ b/boards/qemu/max/qemu_max_qemu_max_smp.yaml @@ -0,0 +1,17 @@ +identifier: qemu_max/qemu_max/smp +name: QEMU Emulation for MAX SMP (ARM) +type: qemu +simulation: qemu +arch: arm64 +toolchain: + - zephyr + - cross-compile +ram: 128 +supported: + - smp +testing: + default: true + ignore_tags: + - net + - bluetooth +vendor: arm diff --git a/boards/qemu/max/qemu_max_qemu_max_smp_defconfig b/boards/qemu/max/qemu_max_qemu_max_smp_defconfig new file mode 100644 index 00000000000000..e1e9a54e7a6955 --- /dev/null +++ b/boards/qemu/max/qemu_max_qemu_max_smp_defconfig @@ -0,0 +1,28 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +# Basic board infomation +CONFIG_QEMU_ICOUNT=n + +# SMP support +CONFIG_SMP=y +CONFIG_ARMV8_A_NS=y +CONFIG_MP_MAX_NUM_CPUS=4 +CONFIG_CACHE_MANAGEMENT=y +CONFIG_TIMEOUT_64BIT=y +CONFIG_ARM64_SET_VMPIDR_EL2=y +CONFIG_ARM64_SET_VPIDR_EL2=y + +# PSCI is supported +CONFIG_PM_CPU_OPS=y +CONFIG_PM_CPU_OPS_PSCI=y + +# Enable Timer and Sys clock +CONFIG_SYS_CLOCK_TICKS_PER_SEC=1000 +CONFIG_ARM_ARCH_TIMER=y + +# Enable serial port +CONFIG_SERIAL=y +CONFIG_UART_PL011=y +CONFIG_UART_INTERRUPT_DRIVEN=y diff --git a/drivers/interrupt_controller/intc_gicv3.c b/drivers/interrupt_controller/intc_gicv3.c index e3c25c4d8b8fcc..b6cf8c933f9594 100644 --- a/drivers/interrupt_controller/intc_gicv3.c +++ b/drivers/interrupt_controller/intc_gicv3.c @@ -264,6 +264,21 @@ void arm_gic_eoi(unsigned int intid) write_sysreg(intid, ICC_EOIR1_EL1); } +#ifdef CONFIG_ZVM +void arm_gic_eoi_deactive(unsigned int intid, bool no_deactive) +{ + /** + * For PTdevice's intid of VM, write dir to this intid + * may be cause unpredictable action. And When ICC_CTLR_EL1.eoimode + * is set to '1', host os's intid must use deactive operation. + */ + if(!no_deactive){ + write_sysreg(intid, ICC_DIR_EL1); + } + barrier_isync_fence_full(); +} +#endif /* CONFIG_ZVM */ + void gic_raise_sgi(unsigned int sgi_id, uint64_t target_aff, uint16_t target_list) { diff --git a/drivers/interrupt_controller/intc_gicv3_priv.h b/drivers/interrupt_controller/intc_gicv3_priv.h index 64fabe2153769a..9df568d1178d91 100644 --- a/drivers/interrupt_controller/intc_gicv3_priv.h +++ b/drivers/interrupt_controller/intc_gicv3_priv.h @@ -78,6 +78,9 @@ #define GICR_TYPER_PROCESSOR_NUMBER_SHIFT 8 #define GICR_TYPER_PROCESSOR_NUMBER_MASK 0xFFFFUL #define GICR_TYPER_PROCESSOR_NUMBER_GET(_val) MASK_GET(_val, GICR_TYPER_PROCESSOR_NUMBER) +#define GICR_TYPER_LPI_AFFINITY_SHIFT 24 +#define GICR_TYPER_LPI_AFFINITY_MASK 0x3UL +#define GICR_TYPER_LPI_AFFINITY_GET(_val) MASK_GET(_val, GICR_TYPER_AFFINITY_VALUE) /* GICR_WAKER */ #define GICR_WAKER_PS 1 @@ -114,6 +117,13 @@ #define GIC_DIST_IROUTER 0x6000 #define IROUTER(base, n) (base + GIC_DIST_IROUTER + (n) * 8) +#define GICR_SGI_CTLR 0x0000 +#define GICR_SGI_ISENABLER 0x0100 +#define GICR_SGI_ICENABLER 0x0180 +#define GICR_SGI_PENDING 0x0200 +#define GICR_SGI_ICPENDING 0x0280 +#define GICR_SGI_PIDR2 0xFFE8 + /* * ITS registers, offsets from ITS_base */ diff --git a/drivers/pm_cpu_ops/pm_cpu_ops_psci.h b/drivers/pm_cpu_ops/pm_cpu_ops_psci.h index 606071fbb07255..0e07768ad7967e 100644 --- a/drivers/pm_cpu_ops/pm_cpu_ops_psci.h +++ b/drivers/pm_cpu_ops/pm_cpu_ops_psci.h @@ -37,6 +37,10 @@ #define PSCI_0_2_FN64_MIGRATE PSCI_0_2_FN64(5) #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) #define PSCI_0_2_FN64_SYSTEM_RESET PSCI_0_2_FN(9) +/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */ +#define PSCI_0_2_TOS_UP_MIGRATE 0 +#define PSCI_0_2_TOS_UP_NO_MIGRATE 1 +#define PSCI_0_2_TOS_MP 2 /* PSCI v1.0 interface */ #define PSCI_1_0_FN_BASE (0x84000000U) diff --git a/dts/arm64/qemu/qemu-virt-max.dtsi b/dts/arm64/qemu/qemu-virt-max.dtsi new file mode 100644 index 00000000000000..26c47c6f9ec3e8 --- /dev/null +++ b/dts/arm64/qemu/qemu-virt-max.dtsi @@ -0,0 +1,109 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +/ { + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a55"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a55"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a55"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a55"; + reg = <3>; + }; + + }; + + timer { + compatible = "arm,armv8-timer"; + interrupt-parent = <&gic>; + interrupts = , + , + , + ; + }; + + uartclk: apb-pclk { + compatible = "fixed-clock"; + clock-frequency = <24000000>; + #clock-cells = <0>; + }; + + soc { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + interrupt-parent = <&gic>; + + gic: interrupt-controller@8000000 { + compatible = "arm,gic-v3", "arm,gic"; + reg = <0x00 0x8000000 0x00 0x010000>, + <0x00 0x80a0000 0x00 0xf60000>; + interrupt-controller; + #interrupt-cells = <4>; + status = "okay"; + #size-cells = <0x02>; + #address-cells = <0x02>; + + gic_ist: its@8080000 { + compatible = "arm,gic-v3-its"; + phandle = <0x8006>; + reg = <0x00 0x8080000 0x0 0x20000>; + msi-controller; + }; + }; + + uart0: uart@9000000 { + compatible = "arm,pl011"; + reg = <0x00 0x9000000 0x00 0x1000>; + status = "disabled"; + interrupts = ; + interrupt-names = "irq_0"; + clocks = <&uartclk>; + }; + + flash0: flash@0 { + compatible = "cfi-flash"; + bank-width = <4>; + /* As this is pointed to by zephyr,flash we can only handle + * one value in the reg property, so we comment out the + * second flash bank for now + */ + reg = <0x0 0x0 0x0 DT_SIZE_M(2)>; + }; + }; +}; diff --git a/include/zephyr/arch/arm64/arch_inlines.h b/include/zephyr/arch/arm64/arch_inlines.h index 79e4690b2b1382..31168976b2375a 100644 --- a/include/zephyr/arch/arm64/arch_inlines.h +++ b/include/zephyr/arch/arm64/arch_inlines.h @@ -14,11 +14,27 @@ #include #include +#if defined(CONFIG_HAS_ARM_VHE) && defined(CONFIG_ZVM) +static ALWAYS_INLINE _cpu_t *arch_curr_cpu(void) +{ + return (_cpu_t *)(read_tpidr_el2() & TPIDRROEL0_CURR_CPU); +} + +static ALWAYS_INLINE void arch_set_cpu_id_elx(void) +{ + write_tpidr_el2(read_tpidrro_el0()); +} +#else /* Note: keep in sync with `get_cpu` in arch/arm64/core/macro_priv.inc */ static ALWAYS_INLINE _cpu_t *arch_curr_cpu(void) { return (_cpu_t *)(read_tpidrro_el0() & TPIDRROEL0_CURR_CPU); } +static ALWAYS_INLINE void arch_set_cpu_id_elx(void) +{ + /* Do nothing */ +} +#endif static ALWAYS_INLINE int arch_exception_depth(void) { diff --git a/include/zephyr/arch/arm64/cpu.h b/include/zephyr/arch/arm64/cpu.h index 076f2d9b0257ee..9be84b48d458d9 100644 --- a/include/zephyr/arch/arm64/cpu.h +++ b/include/zephyr/arch/arm64/cpu.h @@ -54,7 +54,10 @@ #define SCTLR_I_BIT BIT(12) #define SCTLR_BR_BIT BIT(17) +#define CPACR_EL1_ZEN (0x3 << 16) #define CPACR_EL1_FPEN_NOTRAP (0x3 << 20) +#define CPACR_EL1_TTA BIT(28) +#define CPTR_EL2_TAM BIT(30) #define SCR_NS_BIT BIT(0) #define SCR_IRQ_BIT BIT(1) @@ -100,13 +103,23 @@ #define ESR_ISS_MASK BIT_MASK(25) #define ESR_IL_SHIFT (25) #define ESR_IL_MASK BIT_MASK(1) +#define ESR_ISS_CV_SHIFT (24) +#define ESR_ISS_CV_MASK BIT_MASK(1) +#define ESR_ISS_COND_SHIFT (15) +#define ESR_ISS_COND_MASK BIT_MASK(4) #define GET_ESR_EC(esr) (((esr) >> ESR_EC_SHIFT) & ESR_EC_MASK) #define GET_ESR_IL(esr) (((esr) >> ESR_IL_SHIFT) & ESR_IL_MASK) #define GET_ESR_ISS(esr) (((esr) >> ESR_ISS_SHIFT) & ESR_ISS_MASK) +#define GET_ESR_ISS_COND(esr) (((esr) >> ESR_ISS_COND_SHIFT) & ESR_ISS_COND_MASK) #define CNTV_CTL_ENABLE_BIT BIT(0) #define CNTV_CTL_IMASK_BIT BIT(1) +#define CNTV_CTL_ISTAT_BIT BIT(2) + +#define CNTP_CTL_ENABLE_BIT BIT(0) +#define CNTP_CTL_IMASK_BIT BIT(1) +#define CNTP_CTL_ISTAT_BIT BIT(2) #define ID_AA64PFR0_EL0_SHIFT (0) #define ID_AA64PFR0_EL1_SHIFT (4) @@ -134,11 +147,52 @@ #define CPTR_EL2_RES1 BIT(13) | BIT(12) | BIT(9) | (0xff) +#define HCR_VM_BIT BIT(0) +#define HCR_SWIO_BIT BIT(1) +#define HCR_PTW_BIT BIT(2) #define HCR_FMO_BIT BIT(3) #define HCR_IMO_BIT BIT(4) #define HCR_AMO_BIT BIT(5) +#define HCR_VF_BIT BIT(6) +#define HCR_VI_BIT BIT(7) +#define HCR_VSE_BIT BIT(8) +#define HCR_FB_BIT BIT(9) +#define HCR_BSU_IS_BIT BIT(10) +#define HCR_BSU_BIT (3 << 10) +#define HCR_DC_BIT BIT(12) +#define HCR_TWI_BIT BIT(13) +#define HCR_TWE_BIT BIT(14) +#define HCR_TID0_BIT BIT(15) +#define HCR_TID1_BIT BIT(16) +#define HCR_TID2_BIT BIT(17) +#define HCR_TID3_BIT BIT(18) +#define HCR_TSC_BIT BIT(19) +#define HCR_TIDCP_BIT BIT(20) +#define HCR_TAC_BIT BIT(21) +#define HCR_TSW_BIT BIT(22) +#define HCR_TPC_BIT BIT(23) +#define HCR_TPU_BIT BIT(24) +#define HCR_TTLB_BIT BIT(25) +#define HCR_TVM_BIT BIT(26) #define HCR_TGE_BIT BIT(27) +#define HCR_TDZ_BIT BIT(28) +#define HCR_HCDV_BIT BIT(29) +#define HCR_TRVM_BIT BIT(30) #define HCR_RW_BIT BIT(31) +#define HCR_CD_BIT BIT(32) +#define HCR_ID_BIT BIT(33) +#define HCR_E2H_BIT BIT(34) +#define HCR_TLOR_BIT BIT(35) +#define HCR_TERR_BIT BIT(36) +#define HCR_TEA_BIT BIT(37) +#define HCR_APK_BIT BIT(40) +#define HCR_API_BIT BIT(41) +#define HCR_FWB_BIT BIT(46) +#define HCR_FIEN_BIT BIT(47) +#define HCR_AMVOFFEN_BIT BIT(51) +#define HCR_ATA_BIT BIT(56) +#define HCR_DCT_BIT BIT(57) +#define HCR_TID5_BIT BIT(58) /* System register interface to GICv3 */ #define ICC_IGRPEN1_EL1 S3_0_C12_C12_7 @@ -159,6 +213,7 @@ #define ICC_EOIR0_EL1 S3_0_C12_C8_1 #define ICC_EOIR1_EL1 S3_0_C12_C12_1 #define ICC_SGI0R_EL1 S3_0_C12_C11_7 +#define ICC_DIR_EL1 S3_0_C12_C11_1 /* register constants */ #define ICC_SRE_ELx_SRE_BIT BIT(0) diff --git a/include/zephyr/arch/arm64/lib_helpers.h b/include/zephyr/arch/arm64/lib_helpers.h index 0f3d9d563abe24..2a979984de63c8 100644 --- a/include/zephyr/arch/arm64/lib_helpers.h +++ b/include/zephyr/arch/arm64/lib_helpers.h @@ -78,6 +78,27 @@ MAKE_REG_HELPER(tpidrro_el0); MAKE_REG_HELPER(vmpidr_el2); MAKE_REG_HELPER(sp_el0); +MAKE_REG_HELPER(cntp_ctl_el0); +MAKE_REG_HELPER(cntp_cval_el0) +MAKE_REG_HELPER(cntpct_el0); +MAKE_REG_HELPER(hstr_el2); +MAKE_REG_HELPER(id_aa64pfr1_el1); +MAKE_REG_HELPER(id_aa64mmfr1_el1); +MAKE_REG_HELPER(lorc_el1); +MAKE_REG_HELPER(mdscr_el1); +MAKE_REG_HELPER(midr_el1); +MAKE_REG_HELPER(mdcr_el2); +MAKE_REG_HELPER(pmcr_el0); +MAKE_REG_HELPER(sp_el1); +MAKE_REG_HELPER(tpidr_el0); +MAKE_REG_HELPER(tpidr_el1); +MAKE_REG_HELPER(tpidr_el2); +MAKE_REG_HELPER(vdisr_el2); +MAKE_REG_HELPER(vpidr_el2); +MAKE_REG_HELPER(vttbr_el2); +MAKE_REG_HELPER(vtcr_el2); +MAKE_REG_HELPER(isr_el1); + MAKE_REG_HELPER_EL123(actlr) MAKE_REG_HELPER_EL123(cpacr) MAKE_REG_HELPER_EL123(cptr) @@ -89,7 +110,9 @@ MAKE_REG_HELPER_EL123(sctlr) MAKE_REG_HELPER_EL123(spsr) MAKE_REG_HELPER_EL123(tcr) MAKE_REG_HELPER_EL123(ttbr0) +MAKE_REG_HELPER_EL123(ttbr1) MAKE_REG_HELPER_EL123(vbar) +MAKE_REG_HELPER_EL123(hpfar) #if defined(CONFIG_ARM_MPU) /* Armv8-R aarch64 mpu registers */ @@ -104,6 +127,69 @@ MAKE_REG_HELPER(prbar_el1); MAKE_REG_HELPER(prlar_el1); #endif +#if defined(CONFIG_GIC_V3) +#define eisr_el2 s3_4_c12_c11_3 +#define elrsr_el2 s3_4_c12_c11_5 + +MAKE_REG_HELPER(eisr_el2); +MAKE_REG_HELPER(elrsr_el2); +#endif /* CONFIG_GIC_V3 */ + +/* Armv8.1+ VHE register */ +#if defined(CONFIG_HAS_ARM_VHE) +#define sctlr_el12 s3_5_c1_c0_0 +#define trfcr_el12 s3_5_c1_c0_1 +#define cpacr_el12 s3_5_c1_c0_2 +#define zcr_el12 s3_5_c1_c2_0 +#define ttbr0_el12 s3_5_c2_c0_0 +#define ttbr1_el12 s3_5_c2_c0_1 +#define tcr_el12 s3_5_c2_c0_2 +#define afsr0_el12 s3_5_c5_c1_0 +#define afsr1_el12 s3_5_c5_c1_1 +#define esr_el12 s3_5_c5_c2_0 +#define far_el12 s3_5_c6_c0_0 +#define pmscr_el12 s3_5_c9_c9_0 +#define mair_el12 s3_5_c10_c2_0 +#define amair_el12 s3_5_c10_c3_0 +#define vbar_el12 s3_5_c12_c0_0 +#define contextidr_el12 s3_5_c13_c0_1 +#define spsr_el12 s3_5_c4_c0_0 +#define elr_el12 s3_5_c4_c0_1 +#define cntkctl_el12 s3_5_c14_c1_0 +#define cntp_tval_el02 s3_5_c14_c2_0 +#define cntp_ctl_el02 s3_5_c14_c2_1 +#define cntp_cval_el02 s3_5_c14_c2_2 +#define cntv_tval_el02 s3_5_c14_c3_0 +#define cntv_ctl_el02 s3_5_c14_c3_1 +#define cntv_cval_el02 s3_5_c14_c3_2 + +MAKE_REG_HELPER(sctlr_el12); +MAKE_REG_HELPER(trfcr_el12); +MAKE_REG_HELPER(cpacr_el12); +MAKE_REG_HELPER(zcr_el12); +MAKE_REG_HELPER(ttbr0_el12); +MAKE_REG_HELPER(ttbr1_el12); +MAKE_REG_HELPER(tcr_el12); +MAKE_REG_HELPER(afsr0_el12); +MAKE_REG_HELPER(afsr1_el12); +MAKE_REG_HELPER(esr_el12); +MAKE_REG_HELPER(far_el12); +MAKE_REG_HELPER(pmscr_el12); +MAKE_REG_HELPER(mair_el12); +MAKE_REG_HELPER(amair_el12); +MAKE_REG_HELPER(vbar_el12); +MAKE_REG_HELPER(contextidr_el12); +MAKE_REG_HELPER(spsr_el12); +MAKE_REG_HELPER(elr_el12); +MAKE_REG_HELPER(cntkctl_el12); +MAKE_REG_HELPER(cntp_tval_el02); +MAKE_REG_HELPER(cntp_ctl_el02); +MAKE_REG_HELPER(cntp_cval_el02); +MAKE_REG_HELPER(cntv_tval_el02); +MAKE_REG_HELPER(cntv_ctl_el02); +MAKE_REG_HELPER(cntv_cval_el02); +#endif /* CONFIG_HAS_ARM_VHE */ + static ALWAYS_INLINE void enable_debug_exceptions(void) { __asm__ volatile ("msr DAIFClr, %0" @@ -191,6 +277,11 @@ static inline bool is_el2_sec_supported(void) ID_AA64PFR0_SEL2_MASK) != 0U); } +static inline bool is_el2_vhe_supported(void) +{ + return MODE_EL2 == GET_EL(read_currentel()); +} + static inline bool is_in_secure_state(void) { /* We cannot read SCR_EL3 from EL2 or EL1 */ diff --git a/include/zephyr/arch/arm64/timer.h b/include/zephyr/arch/arm64/timer.h index d8abba1c0e4efb..09b7dacbf3fcde 100644 --- a/include/zephyr/arch/arm64/timer.h +++ b/include/zephyr/arch/arm64/timer.h @@ -19,21 +19,62 @@ extern "C" { #endif -#define ARM_ARCH_TIMER_IRQ ARM_TIMER_VIRTUAL_IRQ -#define ARM_ARCH_TIMER_PRIO ARM_TIMER_VIRTUAL_PRIO -#define ARM_ARCH_TIMER_FLAGS ARM_TIMER_VIRTUAL_FLAGS +#if defined(CONFIG_ZVM) && defined(CONFIG_HAS_ARM_VHE) +#define ARM_ARCH_TIMER_IRQ ARM_TIMER_HYP_IRQ +#define ARM_ARCH_TIMER_PRIO ARM_TIMER_HYP_PRIO +#define ARM_ARCH_TIMER_FLAGS ARM_TIMER_HYP_FLAGS -static ALWAYS_INLINE void arm_arch_timer_init(void) +#define ARM_ARCH_VIRT_VTIMER_IRQ ARM_TIMER_VIRTUAL_IRQ +#define ARM_ARCH_VIRT_VTIMER_PRIO ARM_TIMER_VIRTUAL_PRIO +#define ARM_ARCH_VIRT_VTIMER_FLAGS ARM_TIMER_VIRTUAL_FLAGS +#define ARM_ARCH_VIRT_PTIMER_IRQ ARM_TIMER_NON_SECURE_IRQ +#define ARM_ARCH_VIRT_PTIMER_PRIO ARM_TIMER_NON_SECURE_PRIO +#define ARM_ARCH_VIRT_PTIMER_FLAGS ARM_TIMER_NON_SECURE_FLAGS + +#define HOST_CYC_PER_TICK ((uint64_t)sys_clock_hw_cycles_per_sec() \ + / (uint64_t)CONFIG_SYS_CLOCK_TICKS_PER_SEC) + +static ALWAYS_INLINE void arm_arch_timer_set_compare(uint64_t val) { -#ifdef CONFIG_TIMER_READS_ITS_FREQUENCY_AT_RUNTIME - extern int z_clock_hw_cycles_per_sec; - uint64_t cntfrq_el0 = read_cntfrq_el0(); + write_cntp_cval_el0(val); +} - __ASSERT(cntfrq_el0 < INT_MAX, "cntfrq_el0 cannot fit in system 'int'"); - z_clock_hw_cycles_per_sec = (int) cntfrq_el0; -#endif +static ALWAYS_INLINE void arm_arch_timer_enable(unsigned char enable) +{ + uint64_t cntp_ctl; + + cntp_ctl = read_cntp_ctl_el0(); + if (enable) { + cntp_ctl |= CNTP_CTL_ENABLE_BIT; + } else { + cntp_ctl &= ~CNTP_CTL_ENABLE_BIT; + } + write_cntp_ctl_el0(cntp_ctl); } +static ALWAYS_INLINE void arm_arch_timer_set_irq_mask(bool mask) +{ + uint64_t cntp_ctl; + + cntp_ctl = read_cntp_ctl_el0(); + if (mask) { + cntp_ctl |= CNTP_CTL_IMASK_BIT; + } else { + cntp_ctl &= ~CNTP_CTL_IMASK_BIT; + } + write_cntp_ctl_el0(cntp_ctl); +} + +static ALWAYS_INLINE uint64_t arm_arch_timer_count(void) +{ + return read_cntpct_el0(); +} + +#else +#define ARM_ARCH_TIMER_IRQ ARM_TIMER_VIRTUAL_IRQ +#define ARM_ARCH_TIMER_PRIO ARM_TIMER_VIRTUAL_PRIO +#define ARM_ARCH_TIMER_FLAGS ARM_TIMER_VIRTUAL_FLAGS + static ALWAYS_INLINE void arm_arch_timer_set_compare(uint64_t val) { write_cntv_cval_el0(val); @@ -73,6 +114,18 @@ static ALWAYS_INLINE uint64_t arm_arch_timer_count(void) { return read_cntvct_el0(); } +#endif /* defined(CONFIG_ZVM) && defined(CONFIG_HAS_ARM_VHE) */ + +static ALWAYS_INLINE void arm_arch_timer_init(void) +{ +#ifdef CONFIG_TIMER_READS_ITS_FREQUENCY_AT_RUNTIME + extern int z_clock_hw_cycles_per_sec; + uint64_t cntfrq_el0 = read_cntfrq_el0(); + + __ASSERT(cntfrq_el0 < INT_MAX, "cntfrq_el0 cannot fit in system 'int'"); + z_clock_hw_cycles_per_sec = (int) cntfrq_el0; +#endif +} #ifdef __cplusplus } diff --git a/include/zephyr/drivers/interrupt_controller/gic.h b/include/zephyr/drivers/interrupt_controller/gic.h index f8d10ff85070f0..c958e6f4574a0c 100644 --- a/include/zephyr/drivers/interrupt_controller/gic.h +++ b/include/zephyr/drivers/interrupt_controller/gic.h @@ -48,6 +48,13 @@ */ #define GICD_IIDR (GIC_DIST_BASE + 0x8) +/* + * 0x010 Distributor Status Register + * v1 ICDSTATUSR + * v2/v3 GICD_STATUSR + */ +#define GICD_STATUSR (GIC_DIST_BASE + 0x10) + /* * 0x080 Interrupt Group Registers * v1 ICDISRn @@ -333,6 +340,16 @@ unsigned int arm_gic_get_active(void); */ void arm_gic_eoi(unsigned int irq); +#ifdef CONFIG_ZVM +/** + * @brief Deactive interrupt after eoi + * + * @param intid: interrupt ID + * @param no_deactive: no need deactive flag + */ +void arm_gic_eoi_deactive(unsigned int intid, bool no_deactive); +#endif /* CONFIG_ZVM */ + #ifdef CONFIG_SMP /** * @brief Initialize GIC of secondary cores diff --git a/include/zephyr/kernel/thread.h b/include/zephyr/kernel/thread.h index fd8e4c02f235e8..718102ce55830d 100644 --- a/include/zephyr/kernel/thread.h +++ b/include/zephyr/kernel/thread.h @@ -72,7 +72,7 @@ struct _thread_base { uint8_t user_options; /* thread state */ - uint8_t thread_state; + uint16_t thread_state; /* * scheduler lock count and thread priority @@ -371,9 +371,13 @@ struct k_thread { /** threads waiting in k_thread_suspend() */ _wait_q_t halt_queue; #endif /* CONFIG_SMP */ - + /* The point for vcpu struct here */ +#ifdef CONFIG_ZVM + void *vcpu_struct; +#endif /** arch-specifics: must always be at the end */ struct _thread_arch arch; + }; typedef struct k_thread _thread_t; diff --git a/include/zephyr/kernel_structs.h b/include/zephyr/kernel_structs.h index cf7daff9a6cf79..bab1d42a70db99 100644 --- a/include/zephyr/kernel_structs.h +++ b/include/zephyr/kernel_structs.h @@ -72,6 +72,11 @@ extern "C" { /* Thread is present in the ready queue */ #define _THREAD_QUEUED (BIT(7)) +#ifdef CONFIG_ZVM +/* vCPU thread need switch context? */ +#define _THREAD_VCPU_NO_SWITCH (BIT(8)) +#endif + /* end - states */ #ifdef CONFIG_STACK_SENTINEL diff --git a/include/zephyr/linker/common-rom.ld b/include/zephyr/linker/common-rom.ld index 409c3e8b8f3dff..af474c4e0407d4 100644 --- a/include/zephyr/linker/common-rom.ld +++ b/include/zephyr/linker/common-rom.ld @@ -17,3 +17,5 @@ #include #include + +#include diff --git a/include/zephyr/linker/common-rom/common-rom-hypervisor.ld b/include/zephyr/linker/common-rom/common-rom-hypervisor.ld new file mode 100644 index 00000000000000..828979b5acc318 --- /dev/null +++ b/include/zephyr/linker/common-rom/common-rom-hypervisor.ld @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include + +SECTION_DATA_PROLOGUE(virtual_device_instances_sections,,) +{ + __virtual_device_instances_start = .; + KEEP(*("._virtual_device_instance.*")); + __virtual_device_instances_end = .; +} GROUP_ROM_LINK_IN(RAMABLE_REGION, ROMABLE_REGION) \ No newline at end of file diff --git a/include/zephyr/zvm/arm/asm.h b/include/zephyr/zvm/arm/asm.h new file mode 100644 index 00000000000000..5a0c43d7a1883e --- /dev/null +++ b/include/zephyr/zvm/arm/asm.h @@ -0,0 +1,31 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_ASM_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_ASM_H_ + +#define ARM_VM_EXCEPTION_SYNC (0x01) +#define ARM_VM_EXCEPTION_IRQ (0x02) +#define ARM_VM_EXCEPTION_SERROR (0x03) +#define ARM_VM_EXCEPTION_IRQ_IN_SYNC (0x04) + +#define DFSC_FT_TRANS_L0 (0x04) +#define DFSC_FT_TRANS_L1 (0x05) +#define DFSC_FT_TRANS_L2 (0x06) +#define DFSC_FT_TRANS_L3 (0x07) +#define DFSC_FT_ACCESS_L0 (0x08) +#define DFSC_FT_ACCESS_L1 (0x09) +#define DFSC_FT_ACCESS_L2 (0x0A) +#define DFSC_FT_ACCESS_L3 (0x0B) +#define DFSC_FT_PERM_L0 (0x0C) +#define DFSC_FT_PERM_L1 (0x0D) +#define DFSC_FT_PERM_L2 (0x0E) +#define DFSC_FT_PERM_L3 (0x0F) + +#define ARM_VM_SERROR_PENDING(x) !!((x) & (1U << 31)) + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_ASM_H_ */ diff --git a/include/zephyr/zvm/arm/cpu.h b/include/zephyr/zvm/arm/cpu.h new file mode 100644 index 00000000000000..59aa77d39dc5f0 --- /dev/null +++ b/include/zephyr/zvm/arm/cpu.h @@ -0,0 +1,210 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_CPU_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_CPU_H_ + +#include +#include +#include +#include +#include + +#define HCR_VHE_FLAGS (HCR_RW_BIT | HCR_TGE_BIT | HCR_E2H_BIT) + /* Host os NVHE flag */ +#define HCR_NVHE_FLAGS (HCR_RW_BIT | HCR_API_BIT | HCR_APK_BIT | HCR_ATA_BIT) +/* Ignored bit: HCR_TVM, and ignore HCR_TSW to avoid cache DC trap */ +#define HCR_VM_FLAGS (0UL | HCR_VM_BIT | HCR_FB_BIT | HCR_AMO_BIT | \ + HCR_FMO_BIT | HCR_IMO_BIT | HCR_BSU_IS_BIT | HCR_TAC_BIT | HCR_E2H_BIT | \ + HCR_TIDCP_BIT | HCR_RW_BIT | HCR_PTW_BIT ) + +/* Hypervisor cpu interface related register */ +#define ICH_AP0R0_EL2 S3_4_C12_C8_0 +#define ICH_AP0R1_EL2 S3_4_C12_C8_1 +#define ICH_AP0R2_EL2 S3_4_C12_C8_2 +#define ICH_AP1R0_EL2 S3_4_C12_C9_0 +#define ICH_AP1R1_EL2 S3_4_C12_C9_1 +#define ICH_AP1R2_EL2 S3_4_C12_C9_2 +#define ICH_VSEIR_EL2 S3_4_C12_C9_4 +#define ICH_SRE_EL2 S3_4_C12_C9_5 +#define ICH_HCR_EL2 S3_4_C12_C11_0 +#define ICH_EISR_EL2 S3_4_C12_C11_3 +#define ICH_VTR_EL2 S3_4_C12_C11_1 +#define ICH_VMCR_EL2 S3_4_C12_C11_7 +#define ICH_LR0_EL2 S3_4_C12_C12_0 +#define ICH_LR1_EL2 S3_4_C12_C12_1 +#define ICH_LR2_EL2 S3_4_C12_C12_2 +#define ICH_LR3_EL2 S3_4_C12_C12_3 +#define ICH_LR4_EL2 S3_4_C12_C12_4 +#define ICH_LR5_EL2 S3_4_C12_C12_5 +#define ICH_LR6_EL2 S3_4_C12_C12_6 +#define ICH_LR7_EL2 S3_4_C12_C12_7 + +/* commom reg macro */ +#define __SYSREG_c0 0 +#define __SYSREG_c1 1 +#define __SYSREG_c2 2 +#define __SYSREG_c3 3 +#define __SYSREG_c4 4 +#define __SYSREG_c5 5 +#define __SYSREG_c6 6 +#define __SYSREG_c7 7 +#define __SYSREG_c8 8 +#define __SYSREG_c9 9 +#define __SYSREG_c10 10 +#define __SYSREG_c11 11 +#define __SYSREG_c12 12 +#define __SYSREG_c13 13 +#define __SYSREG_c14 14 +#define __SYSREG_c15 15 + +#define __SYSREG_0 0 +#define __SYSREG_1 1 +#define __SYSREG_2 2 +#define __SYSREG_3 3 +#define __SYSREG_4 4 +#define __SYSREG_5 5 +#define __SYSREG_6 6 +#define __SYSREG_7 7 + +/* ESR_ELX related register, May be mov to ../arm64/cpu.h */ +#define ESR_SYSINS_OP0_MASK (0x00300000) +#define ESR_SYSINS_OP0_SHIFT (20) +#define ESR_SYSINS_OP2_MASK (0x000e0000) +#define ESR_SYSINS_OP2_SHIFT (17) +#define ESR_SYSINS_OP1_MASK (0x0001c000) +#define ESR_SYSINS_OP1_SHIFT (14) +#define ESR_SYSINS_CRN_MASK (0x00003c00) +#define ESR_SYSINS_CRN_SHIFT (10) +#define ESR_SYSINS_RT_MASK (0x000003e0) +#define ESR_SYSINS_RT_SHIFT (5) +#define ESR_SYSINS_CRM_MASK (0x0000001e) +#define ESR_SYSINS_CRM_SHIFT (1) +#define ESR_SYSINS_REGS_MASK (ESR_SYSINS_OP0_MASK|ESR_SYSINS_OP2_MASK|\ + ESR_SYSINS_OP1_MASK|ESR_SYSINS_CRN_MASK|\ + ESR_SYSINS_CRM_MASK) + +#define ESR_SYSINS(op0, op1, crn, crm, op2) \ + (((__SYSREG_##op0) << ESR_SYSINS_OP0_SHIFT) | \ + ((__SYSREG_##op1) << ESR_SYSINS_OP1_SHIFT) | \ + ((__SYSREG_##crn) << ESR_SYSINS_CRN_SHIFT) | \ + ((__SYSREG_##crm) << ESR_SYSINS_CRM_SHIFT) | \ + ((__SYSREG_##op2) << ESR_SYSINS_OP2_SHIFT)) + +#define ESR_SYSINSREG_SGI1R_EL1 ESR_SYSINS(3,0,c12,c11,5) +#define ESR_SYSINSREG_ASGI1R_EL1 ESR_SYSINS(3,1,c12,c11,6) +#define ESR_SYSINSREG_SGI0R_EL1 ESR_SYSINS(3,2,c12,c11,7) +#define ESR_SYSINSERG_CTLR_EL1 ESR_SYSINS(3,0,c12,c12,4) +#define ESR_SYSINSREG_CNTPCT_EL0 ESR_SYSINS(3,3,c14,c0,0) +#define ESR_SYSINSREG_CNTP_TVAL_EL0 ESR_SYSINS(3,3,c14,c2,0) +#define ESR_SYSINSREG_CNTP_CTL_EL0 ESR_SYSINS(3,3,c14,c2,1) +#define ESR_SYSINSREG_CNTP_CVAL_EL0 ESR_SYSINS(3,3,c14,c2,2) + +enum { + VCPU_MPIDR_EL1, + VCPU_CSSELR_EL1, + VCPU_SCTLR_EL1, + VCPU_ACTLR_EL1, + VCPU_CPACR_EL1, + VCPU_TTBR0_EL1, + VCPU_TTBR1_EL1, + VCPU_TCR_EL1, + VCPU_ESR_EL1, + VCPU_AFSR0_EL1, + VCPU_AFSR1_EL1, + VCPU_FAR_EL1, + VCPU_MAIR_EL1, + VCPU_VBAR_EL1, + VCPU_CONTEXTIDR_EL1, + VCPU_TPIDR_EL0, + VCPU_TPIDRRO_EL0, + VCPU_TPIDR_EL1, + VCPU_AMAIR_EL1, + VCPU_CNTKCTL_EL1, + VCPU_PAR_EL1, + VCPU_MDSCR_EL1, + VCPU_DISR_EL1, + VCPU_ELR_EL1, + VCPU_SP_EL1, + VCPU_SPSR_EL1, + VCPU_VPIDR, + VCPU_SYS_REG_NUM +}; + +struct arch_commom_regs { + + struct _callee_saved callee_saved_regs; + struct arch_esf esf_handle_regs; + + uint64_t pc; + uint64_t pstate; + uint64_t lr; + +}; +typedef struct arch_commom_regs arch_commom_regs_t; + +struct zvm_vcpu_context { + struct arch_commom_regs regs; + struct z_vcpu *running_vcpu; + uint64_t sys_regs[VCPU_SYS_REG_NUM]; +}; +typedef struct zvm_vcpu_context zvm_vcpu_context_t; + +struct vcpu_fault_info { + uint64_t esr_el2; + uint64_t disr_el1; + uint64_t far_el2; + uint64_t hpfar_el2; +}; + +struct vcpu_arch { + struct zvm_vcpu_context ctxt; + struct zvm_vcpu_context host_ctxt; + + /* Don't run the guest on this vcpu */ + bool pause; + bool first_run_vcpu; + bool vcpu_sys_register_loaded; + + /* HYP configuration. */ + uint64_t hcr_el2; + + uint64_t host_mdcr_el2; + uint64_t guest_mdcr_el2; + + /* arm gic list register bitmap for recording used lr */ + uint64_t list_regs_map; + + /* Exception information. */ + struct vcpu_fault_info fault; + + struct virt_timer_context *vtimer_context; + void *virq_data; +}; +typedef struct vcpu_arch vcpu_arch_t; + +/* vector and hyp_vector function */ +extern void *_vector_table[]; +extern void _hyp_vector_table(void); + +uint64_t* find_index_reg(uint16_t index, arch_commom_regs_t *regs); + +void vcpu_sysreg_load(struct z_vcpu *vcpu); +void vcpu_sysreg_save(struct z_vcpu *vcpu); + +void arch_vcpu_context_load(struct z_vcpu *vcpu); +void arch_vcpu_context_save(struct z_vcpu *vcpu); + +void switch_to_guest_sysreg(struct z_vcpu *vcpu); +void switch_to_host_sysreg(struct z_vcpu *vcpu); + +int arch_vcpu_init(struct z_vcpu *vcpu); +int arch_vcpu_deinit(struct z_vcpu *vcpu); + +int zvm_arch_init(void *op); + +#endif /*ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_CPU_H_*/ diff --git a/include/zephyr/zvm/arm/mmu.h b/include/zephyr/zvm/arm/mmu.h new file mode 100644 index 00000000000000..9da492abb6cc7b --- /dev/null +++ b/include/zephyr/zvm/arm/mmu.h @@ -0,0 +1,126 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_MM_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_MM_H_ + +#include +#include +#include +#include +#include +#include +#include + +/** + * stage-2 Memory types supported through MAIR. + */ +#define MT_S2_TYPE_MASK 0xFU +#define MT_S2_TYPE(attr) (attr & MT_TYPE_MASK) +#define MT_S2_DEVICE_nGnRnE 0U +#define MT_S2_DEVICE_nGnRE 1U +#define MT_S2_DEVICE_GRE 2U +#define MT_S2_NORMAL_NC 3U +#define MT_S2_NORMAL 4U +#define MT_S2_NORMAL_WT 5U +#define MT_S2_NORMAL_WB 6U + +/* Reuse host's mair for configure */ +#define MEMORY_S2_ATTRIBUTES ((0x00 << (MT_S2_DEVICE_nGnRnE * 8)) | \ + (0x04 << (MT_S2_DEVICE_nGnRE * 8)) | \ + (0x0c << (MT_S2_DEVICE_GRE * 8)) | \ + (0x44 << (MT_S2_NORMAL_NC * 8)) | \ + (0xffUL << (MT_S2_NORMAL * 8)) | \ + (0xbbUL << (MT_S2_NORMAL_WT * 8))) + +/* More flags from user's perpective are supported using remaining bits + * of "attrs" field, i.e. attrs[31:4], underlying code will take care + * of setting PTE fields correctly. + */ +#define MT_S2_PERM_R_SHIFT 4U +#define MT_S2_PERM_W_SHIFT 5U +#define MT_S2_EXECUTE_SHIFT 6U +#define MT_S2_NOACCESS_SHIFT 7U +#define MT_S2_NR (0U << MT_S2_PERM_R_SHIFT) +#define MT_S2_R (1U << MT_S2_PERM_R_SHIFT) +#define MT_S2_NW (0U << MT_S2_PERM_W_SHIFT) +#define MT_S2_W (1U << MT_S2_PERM_W_SHIFT) +#define MT_S2_EXECUTE_NEVER (0U << MT_S2_EXECUTE_SHIFT) +#define MT_S2_EXECUTE (1U << MT_S2_EXECUTE_SHIFT) +#define MT_S2_ACCESS_ON (0U << MT_S2_NOACCESS_SHIFT) +#define MT_S2_ACCESS_OFF (1U << MT_S2_NOACCESS_SHIFT) +#define MT_S2_P_RW_U_RW_NXN (MT_S2_R | MT_S2_W | MT_S2_EXECUTE) +#define MT_S2_P_RW_U_RW_XN (MT_S2_R | MT_S2_W | MT_S2_EXECUTE_NEVER) + +#define MT_VM_NORMAL_MEM (MT_S2_P_RW_U_RW_NXN | MT_S2_NORMAL) +#define MT_VM_DEVICE_MEM (MT_S2_P_RW_U_RW_XN | MT_S2_DEVICE_GRE) + + +/* + * Block and Page descriptor attributes fields for stage-2 + */ +#define S2_PTE_BLOCK_DESC_MEMTYPE(x) (x << 2) +#define S2_PTE_BLOCK_DESC_I_DEV_CACHE (0ULL << 2) +#define S2_PTE_BLOCK_DESC_I_NO_CACHE (1ULL << 2) +#define S2_PTE_BLOCK_DESC_I_WT_CACHE (2ULL << 2) +#define S2_PTE_BLOCK_DESC_I_WB_CACHE (3ULL << 2) +#define S2_PTE_BLOCK_DESC_O_DEV_CACHE (0ULL << 4) +#define S2_PTE_BLOCK_DESC_O_NO_CACHE (1ULL << 4) +#define S2_PTE_BLOCK_DESC_O_WT_CACHE (2ULL << 4) +#define S2_PTE_BLOCK_DESC_O_WB_CACHE (3ULL << 4) +#define S2_PTE_BLOCK_DESC_AP_NO_RW (0ULL << 6) +#define S2_PTE_BLOCK_DESC_AP_RO (1ULL << 6) +#define S2_PTE_BLOCK_DESC_AP_WO (2ULL << 6) +#define S2_PTE_BLOCK_DESC_AP_RW (3ULL << 6) +#define S2_PTE_BLOCK_DESC_NON_SHARE (0ULL << 8) +#define S2_PTE_BLOCK_DESC_OUTER_SHARE (2ULL << 8) +#define S2_PTE_BLOCK_DESC_INNER_SHARE (3ULL << 8) +#define S2_PTE_BLOCK_DESC_AF (1ULL << 10) +#define S2_PTE_BLOCK_DESC_XS (0ULL << 11) +#define S2_PTE_BLOCK_DESC_NO_XN (0ULL << 53) +#define S2_PTE_BLOCK_DESC_P_XN (1ULL << 53) +#define S2_PTE_BLOCK_DESC_PU_XN (2ULL << 53) +#define S2_PTE_BLOCK_DESC_U_XN (3ULL << 53) + +/* aliged memeory size to page */ +#define ALIGN_TO_PAGE(size) (((size) + (CONFIG_MMU_PAGE_SIZE - 1)) & ~(CONFIG_MMU_PAGE_SIZE - 1)) + +/** + * @brief Mapping vpart to physical block address. + */ +int arch_mmap_vpart_to_block(uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs); +int arch_unmap_vpart_to_block(uintptr_t virt, size_t size); + +int arch_vm_dev_domain_unmap(uint64_t pbase, uint64_t vbase, uint64_t size, char *name, uint16_t vmid, struct arm_mmu_ptables *ptables); +int arch_vm_dev_domain_map(uint64_t pbase, uint64_t vbase, uint64_t size, char *name, uint16_t vmid, struct arm_mmu_ptables *ptables); + +/** + * @brief map vma to physical block address: + * this function aim to translate virt address to phys address by setting the + * hyp page table. + */ +int arch_mmap_vma_to_block(uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs); +int arch_unmap_vma_to_block(uintptr_t virt, size_t size); + +/** + * @brief Add a partition to the vm virtual memory domain. + */ +int arch_vm_mem_domain_partition_add(struct k_mem_domain *domain, + uint32_t partition_id, uintptr_t phys_start, uint32_t vmid); + +/** + * @brief remove a partition from the vm virtual memory domain. + */ +int arch_vm_mem_domain_partition_remove(struct k_mem_domain *domain, + uint32_t partition_id, uint32_t vmid); + +/** + * @brief Architecture-specific hook for vm domain initialization. + */ +int arch_mem_domain_init(struct k_mem_domain *domain); + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_MM_H_ */ diff --git a/include/zephyr/zvm/arm/switch.h b/include/zephyr/zvm/arm/switch.h new file mode 100644 index 00000000000000..b371ad5b7a97c4 --- /dev/null +++ b/include/zephyr/zvm/arm/switch.h @@ -0,0 +1,98 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_SWITCH_H__ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_SWITCH_H__ + +#include +#include + +/* VM interrupt description MACRO */ +#define VM_BASE_VIRQ_NR (256) +#define VM_SGI_VIRQ_NR (16) +#define VM_PPI_VIRQ_NR (16) +#define VM_SPI_VIRQ_NR (VM_BASE_VIRQ_NR - VM_SGI_VIRQ_NR - VM_PPI_VIRQ_NR) + +#define ISS_SAS_8BIT (0x0) +#define ISS_SAS_16BIT (0x1) +#define ISS_SAS_32BIT (0x2) +#define ISS_SAS_64BIT (0x3) + +/* HPFAR_EL2 addr mask */ +#define HPFAR_EL2_MASK GENMASK(39,4) +#define HPFAR_EL2_SHIFT (4) +#define HPFAR_EL2_PAGE_MASK GENMASK(11,0) +#define HPFAR_EL2_PAGE_SHIFT (12) + +struct z_vcpu; + +struct esr_dabt_area { + uint64_t dfsc :6; /* Data Fault Status Code */ + uint64_t wnr :1; /* Write / not Read */ + uint64_t s1ptw :1; /* Stage 2 fault during stage 1 translation */ + uint64_t cm :1; /* Cache Maintenance */ + uint64_t ea :1; /* External Abort Type */ + uint64_t fnv :1; /* FAR not Valid */ + uint64_t set :2; /* Synchronous Error Type */ + uint64_t vncr :1; /* Indicates that the fault came from use of VNCR_EL2.*/ + uint64_t ar :1; /* Acquire Release */ + uint64_t sf :1; /* Sixty Four bit register */ + uint64_t srt :5; /* The Register which store the value */ + uint64_t sse :1; /* Sign extend */ + uint64_t sas :2; /* Syndrome Access Size */ + uint64_t isv :1; /* Syndrome Valid */ + uint64_t il :1; /* Instruction length */ + uint64_t ec :6; /* Exception Class */ + uint64_t ISS2 :5; /* FEAT_LS64 is Implemented */ + uint64_t res :27; /* RES0 */ +}; + +struct esr_sysreg_area { + uint64_t dire :1; /* Direction */ + uint64_t crm :4; /* CRm */ + uint64_t rt :5; /* Rt */ + uint64_t crn :4; /* CRn */ + uint64_t op1 :3; /* Op1 */ + uint64_t op2 :3; /* Op2 */ + uint64_t op0 :2; /* Op0 */ + uint64_t res0 :3; /* reserved file */ + uint64_t il :1; /* Instruction length */ + uint64_t ec :6; /* Exception Class */ +}; + +/** + * @brief sync handler for this vm. + */ +void* z_vm_lower_sync_handler(uint64_t esr_elx); + +/** + * @brief irq handler for this vm. + */ +void* z_vm_lower_irq_handler(struct arch_esf *esf_ctxt); + +/** + * @brief ready to run vcpu here, for prepare running guest code. + * This function aim to make preparetion before running guest os and restore + * the origin hardware state after guest exit. + */ +int arch_vcpu_run(struct z_vcpu *vcpu); + +/** + * @brief Avoid switch handle when current thread is a vcpu thread, + * and curretn irq is send to vcpu. + * @retval + * true: this irq is sent to vcpu. + * false: this irq is a normal irq. + */ +bool zvm_switch_handle_pre(uint32_t irq); + +/** + * @brief Get the zvm host context object for context switch + */ +uint64_t get_zvm_host_context(void); + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_SWITCH_H__ */ diff --git a/include/zephyr/zvm/arm/timer.h b/include/zephyr/zvm/arm/timer.h new file mode 100644 index 00000000000000..ce8e5dba862791 --- /dev/null +++ b/include/zephyr/zvm/arm/timer.h @@ -0,0 +1,65 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_VTIMER_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_VTIMER_H_ + +#include +#include +#include +#include <../../kernel/include/timeout_q.h> + +/** + * @brief Virtual timer context for this vcpu. + * Describes only two elements, one is a virtual timer, the other is physical. + */ +struct virt_timer_context { + /* virtual timer irq number */ + uint32_t virt_virq; + uint32_t virt_pirq; + /* control register */ + uint32_t cntv_ctl; + uint32_t cntp_ctl; + /* virtual count compare register */ + uint64_t cntv_cval; + uint64_t cntp_cval; + /* virtual count value register */ + uint64_t cntv_tval; + uint64_t cntp_tval; + /* timeout for softirq */ + struct _timeout vtimer_timeout; + struct _timeout ptimer_timeout; + /* vcpu timer offset value, value is cycle */ + uint64_t timer_offset; + void *vcpu; + bool enable_flag; +}; + +/** + * @brief Simulate cntp_tval_el0 register + */ +void simulate_timer_cntp_tval(struct z_vcpu *vcpu, int read, uint64_t *value); + +/** + * @brief Simulate cntp_cval_el0 register + */ +void simulate_timer_cntp_cval(struct z_vcpu *vcpu, int read, uint64_t *value); + +/** + * @brief Simulate cntp_ctl register + */ +void simulate_timer_cntp_ctl(struct z_vcpu *vcpu, int read, uint64_t *value); + +int arch_vcpu_timer_init(struct z_vcpu *vcpu); +int arch_vcpu_timer_deinit(struct z_vcpu *vcpu); + +/** + * @brief Init zvm arch timer. + */ +int zvm_arch_vtimer_init(void); + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_VTIMER_H_ */ diff --git a/include/zephyr/zvm/os.h b/include/zephyr/zvm/os.h new file mode 100644 index 00000000000000..624c31ef8e8456 --- /dev/null +++ b/include/zephyr/zvm/os.h @@ -0,0 +1,85 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_OS_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_OS_H_ + +#include +#include +#include +#include + +struct getopt_state; + +#define OS_NAME_LENGTH (32) +#define OS_TYPE_ZEPHYR (0) +#define OS_TYPE_LINUX (1) +#define OS_TYPE_OTHERS (2) +#define OS_TYPE_MAX (3) + +#define ZEPHYR_VM_LOAD_BASE DT_REG_ADDR(DT_NODELABEL(zephyr_ddr)) +#define ZEPHYR_VM_LOAD_SIZE DT_REG_SIZE(DT_NODELABEL(zephyr_ddr)) +#define ZEPHYR_VM_MEMORY_BASE DT_PROP(DT_NODELABEL(zephyr_ddr), vm_reg_base) +#define ZEPHYR_VM_MEMORY_SIZE DT_PROP(DT_NODELABEL(zephyr_ddr), vm_reg_size) +#define ZEPHYR_IMAGE_BASE DT_REG_ADDR(DT_ALIAS(zephyrcpy)) +#define ZEPHYR_IMAGE_SIZE DT_REG_SIZE(DT_ALIAS(zephyrcpy)) +#define ZEPHYR_VM_VCPU_NUM DT_PROP(DT_INST(0, zephyr_vm), vcpu_num) + +#define LINUX_VM_LOAD_BASE DT_REG_ADDR(DT_NODELABEL(linux_ddr)) +#define LINUX_VM_LOAD_SIZE DT_REG_SIZE(DT_NODELABEL(linux_ddr)) +#define LINUX_VM_MEMORY_BASE DT_PROP(DT_NODELABEL(linux_ddr), vm_reg_base) +#define LINUX_VM_MEMORY_SIZE DT_PROP(DT_NODELABEL(linux_ddr), vm_reg_size) +#define LINUX_IMAGE_BASE DT_REG_ADDR(DT_ALIAS(linuxcpy)) +#define LINUX_IMAGE_SIZE DT_REG_SIZE(DT_ALIAS(linuxcpy)) +#define LINUX_VMDTB_BASE DT_REG_ADDR(DT_ALIAS(linuxdtb)) +#define LINUX_VMDTB_SIZE DT_REG_SIZE(DT_ALIAS(linuxdtb)) +#define LINUX_VMRFS_BASE DT_REG_ADDR(DT_ALIAS(linuxrfs)) +#define LINUX_VMRFS_SIZE DT_REG_SIZE(DT_ALIAS(linuxrfs)) +#define LINUX_VMRFS_PHY_BASE DT_PROP(DT_INST(0, linux_vm), rootfs_address) +#define LINUX_VM_VCPU_NUM DT_PROP(DT_INST(0, linux_vm), vcpu_num) +#ifdef CONFIG_VM_DTB_FILE_INPUT +#define LINUX_DTB_MEM_BASE DT_PROP(DT_INST(0, linux_vm), dtb_address) +#define LINUX_DTB_MEM_SIZE DT_PROP(DT_INST(0, linux_vm), dtb_size) +#endif /* CONFIG_VM_DTB_FILE_INPUT */ + +/** + * @brief VM information structure in ZVM. + * + * @param os_type: the type of the operating system. + * @param entry_point: the entry point of the vm, when + * boot from elf file, this is not equal to vm_mem_base. + * @param vcpu_num: the number of virtual CPUs. + * @param vm_mem_base: the base address of the vm memory. + * @param vm_mem_size: the size of the vm memory. + * @param vm_image_base: the base address of the vm image in disk. + * @param vm_image_size: the size of the vm image in disk. + */ +struct z_os_info { + uint16_t os_type; + uint16_t vcpu_num; + uint32_t vm_mem_base; + uint32_t vm_mem_size; + uint32_t vm_load_base; + uint64_t vm_image_base; + uint64_t vm_image_size; + uint64_t entry_point; +}; + +struct z_os { + char *name; + bool is_rtos; + struct z_os_info info; +}; + + +int get_os_info_by_type(struct z_os_info *vm_info); + +int load_vm_image(struct vm_mem_domain *vmem_domain, struct z_os *os); + +int vm_os_create(struct z_os* os, struct z_os_info *vm_info); + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_OS_H_ */ diff --git a/include/zephyr/zvm/vdev/pt_device.h b/include/zephyr/zvm/vdev/pt_device.h new file mode 100644 index 00000000000000..fee7c9d9822b8a --- /dev/null +++ b/include/zephyr/zvm/vdev/pt_device.h @@ -0,0 +1,30 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Charlie, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_ZVM_PT_DEVICE_H_ +#define ZEPHYR_INCLUDE_ZVM_PT_DEVICE_H_ + +typedef void (*ptdevice_irq_config_func_t)(const struct device *dev); +typedef void (*ptdevice_special_func_t)(const void *user_data); + +/* pass-through device description. */ +struct pass_through_device_data { + struct z_virt_dev *vdev; +}; + +struct pass_through_device_config { + /*special init function.*/ + ptdevice_special_func_t ptdev_spec_init_func; + + /*special irq function.*/ + ptdevice_special_func_t ptdev_spec_irq_func; + + /*irq configuration function.*/ + ptdevice_irq_config_func_t irq_config_func; +}; + +#endif /* ZEPHYR_INCLUDE_ZVM_PT_DEVICE_H_ */ \ No newline at end of file diff --git a/include/zephyr/zvm/vdev/vgic_common.h b/include/zephyr/zvm/vdev/vgic_common.h new file mode 100644 index 00000000000000..e1048e5a0d40a2 --- /dev/null +++ b/include/zephyr/zvm/vdev/vgic_common.h @@ -0,0 +1,282 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_VGIC_COMMON_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_VGIC_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include + +struct z_virt_dev; + +#define VGIC_CONTROL_BLOCK_ID vgic_control_block +#define VGIC_CONTROL_BLOCK_NAME vm_irq_control_block + +/* GIC version */ +#define VGIC_V2 BIT(0) +#define VGIC_V3 BIT(8) + +/* GIC dev type */ +#define TYPE_GIC_GICD BIT(0) +#define TYPE_GIC_GICR_RD BIT(1) +#define TYPE_GIC_GICR_SGI BIT(2) +#define TYPE_GIC_GICR_VLPI BIT(3) +#define TYPE_GIC_INVAILD (0xFF) + +/* GIC device macro here */ +#define VGIC_DIST_BASE DT_REG_ADDR_BY_IDX(DT_INST(0, arm_gic), 0) +#define VGIC_DIST_SIZE DT_REG_SIZE_BY_IDX(DT_INST(0, arm_gic), 0) +#define VGIC_RDIST_BASE DT_REG_ADDR_BY_IDX(DT_INST(0, arm_gic), 1) +#define VGIC_RDIST_SIZE DT_REG_SIZE_BY_IDX(DT_INST(0, arm_gic), 1) + +/* GICD registers offset from DIST_base(n) */ +#define VGICD_CTLR (GICD_CTLR-GIC_DIST_BASE) +#define VGICD_TYPER (GICD_TYPER-GIC_DIST_BASE) +#define VGICD_IIDR (GICD_IIDR-GIC_DIST_BASE) +#define VGICD_STATUSR (GICD_STATUSR-GIC_DIST_BASE) +#define VGICD_ISENABLERn (GICD_ISENABLERn-GIC_DIST_BASE) +#define VGICD_ICENABLERn (GICD_ICENABLERn-GIC_DIST_BASE) +#define VGICD_ISPENDRn (GICD_ISPENDRn-GIC_DIST_BASE) +#define VGICD_ICPENDRn (GICD_ICPENDRn-GIC_DIST_BASE) + +#define VGIC_RESERVED 0x0F30 +#define VGIC_INMIRn 0x0f80 +#define VGICD_PIDR2 0xFFE8 + +/* Vgic control block flag */ +#define VIRQ_HW_SUPPORT BIT(1) + +#define VGIC_VIRQ_IN_SGI (0x0) +#define VGIC_VIRQ_IN_PPI (0x1) +/* Sorting virt irq to SGI/PPI/SPI */ +#define VGIC_VIRQ_LEVEL_SORT(irq) ((irq)/VM_SGI_VIRQ_NR) + +/* VGIC Type for virtual interrupt control */ +#define VGIC_TYPER_REGISTER (read_sysreg(ICH_VTR_EL2)) +#define VGIC_TYPER_LR_NUM ((VGIC_TYPER_REGISTER & 0x1F) + 1) +#define VGIC_TYPER_PRIO_NUM (((VGIC_TYPER_REGISTER >> 29) & 0x07) + 1) + +/* 64k frame */ +#define VGIC_RD_BASE_SIZE (64 * 1024) +#define VGIC_SGI_BASE_SIZE (64 * 1024) +#define VGIC_RD_SGI_SIZE (VGIC_RD_BASE_SIZE+VGIC_SGI_BASE_SIZE) + +/* virtual gic device register operation */ +#define vgic_sysreg_read32(base, offset) sys_read32((long unsigned int)(base+((offset)/4))) +#define vgic_sysreg_write32(data, base, offset) sys_write32(data, (long unsigned int)(base+((offset)/4))) +#define vgic_sysreg_read64(base, offset) sys_read64((long unsigned int)(base+((offset)/4))) +#define vgic_sysreg_write64(data, base, offset) sys_write64(data, (long unsigned int)(base+((offset)/4))) + +#define DEFAULT_DISABLE_IRQVAL (0xFFFFFFFF) + +/** + * @brief Virtual generatic interrupt controller distributor + * struct for each vm. +*/ +struct virt_gic_gicd { + /** + * gicd address base and size which + * are used to locate vdev access from + * vm. + */ + uint32_t gicd_base; + uint32_t gicd_size; + /* virtual gicr for emulating device for vm. */ + uint32_t *gicd_regs_base; + + /* gicd spin lock */ + struct k_spinlock gicd_lock; +}; + + +typedef int (*vgic_gicd_read_32_t)(const struct device *dev, struct z_vcpu *vcpu, + uint32_t offset, uint32_t *value); + +typedef int (*vgic_gicrrd_read_32_t)(const struct device *dev, struct z_vcpu *vcpu, + uint32_t offset, uint32_t *value); + +typedef int (*vgic_gicd_write_32_t)(const struct device *dev, struct z_vcpu *vcpu, + uint32_t offset, uint32_t *value); + +typedef int (*vgic_gicrrd_write_32_t)(const struct device *dev, struct z_vcpu *vcpu, + uint32_t offset, uint32_t *value); + +__subsystem struct vgic_common_api { + + vgic_gicd_read_32_t vgicd_read_32; + vgic_gicd_write_32_t vgicd_write_32; + + vgic_gicrrd_read_32_t vgicr_rd_read_32; + vgic_gicrrd_write_32_t vgicr_write_32; + +}; + +typedef int (*vm_irq_exit_t)(struct device *dev, struct z_vcpu *vcpu, void *data); + +typedef int (*vm_irq_enter_t)(struct device *dev, struct z_vcpu *vcpu, void *data); + +__subsystem struct vm_irq_handler_api { + vm_irq_exit_t irq_exit_from_vm; + vm_irq_enter_t irq_enter_to_vm; +}; + +uint32_t *arm_gic_get_distbase(struct z_virt_dev *vdev); + +void z_ready_thread(struct k_thread *thread); + +/** + * @brief Just for enable menopoly irq for vcpu. + */ +void arch_vdev_irq_enable(struct z_vcpu *vcpu); + +/** + * @brief Just for disable menopoly irq for vcpu. + */ +void arch_vdev_irq_disable(struct z_vcpu *vcpu); + + +int vgic_vdev_mem_read(struct z_virt_dev *vdev, uint64_t addr, uint64_t *value, uint16_t size); +int vgic_vdev_mem_write(struct z_virt_dev *vdev, uint64_t addr, uint64_t *value, uint16_t size); + +/** + * @brief set/unset a virt irq signal to a vcpu. + */ +int set_virq_to_vcpu(struct z_vcpu *vcpu, uint32_t virq_num); + +/** + * @brief set/unset a virt irq to vm. + */ +int set_virq_to_vm(struct z_vm *vm, uint32_t virq_num); +int unset_virq_to_vm(struct z_vm *vm, uint32_t virq_num); + +int virt_irq_sync_vgic(struct z_vcpu *vcpu); +int virt_irq_flush_vgic(struct z_vcpu *vcpu); + +/** + * @brief Get the virq desc object. + */ +struct virt_irq_desc *get_virt_irq_desc(struct z_vcpu *vcpu, uint32_t virq); + +/** + * @brief When vcpu is loop on idel mode, we must send virq + * to activate it. + */ +static ALWAYS_INLINE void wakeup_target_vcpu(struct z_vcpu *vcpu, struct virt_irq_desc *desc) +{ + ARG_UNUSED(desc); + /* Set thread into runnig queue */ + z_ready_thread(vcpu->work->vcpu_thread); +} + +/** + * @brief Check that whether this vm can recieve virq? + */ +static ALWAYS_INLINE bool is_vm_irq_valid(struct z_vm *vm, uint32_t flag) +{ + if (vm->vm_status == VM_STATE_NEVER_RUN) { + return false; + } + + if (vm->vm_status == VM_STATE_PAUSE) { + if (flag & VIRQ_WAKEUP_FLAG) { + return true; + } else { + return false; + } + } + return true; +} + +static ALWAYS_INLINE struct virt_irq_desc *vgic_get_virt_irq_desc(struct z_vcpu *vcpu, uint32_t virq) +{ + struct z_vm *vm = vcpu->vm; + + /* sgi virq num */ + if (virq < VM_LOCAL_VIRQ_NR) { + return &vcpu->virq_block.vcpu_virt_irq_desc[virq]; + } + + /* spi virq num */ + if((virq >= VM_LOCAL_VIRQ_NR) && (virq < VM_GLOBAL_VIRQ_NR)) { + return &vm->vm_irq_block.vm_virt_irq_desc[virq - VM_LOCAL_VIRQ_NR]; + } + + return NULL; +} + +static ALWAYS_INLINE int vgic_irq_enable(struct z_vcpu *vcpu, uint32_t virt_irq) +{ + struct virt_irq_desc *desc; + + desc = vgic_get_virt_irq_desc(vcpu, virt_irq); + if (!desc) { + return -ENOENT; + } + desc->virq_flags |= VIRQ_ENABLED_FLAG; + if (virt_irq > VM_LOCAL_VIRQ_NR) { + /*TODO: How to route virtual device's irq to vcpu. */ + if (desc->virq_flags & VIRQ_HW_FLAG && vcpu->vcpu_id == 0) { + if (desc->pirq_num > VM_LOCAL_VIRQ_NR) + irq_enable(desc->pirq_num); + else { + return -ENODEV; + } + } + } else { + if(desc->virq_flags & VIRQ_HW_FLAG) { + irq_enable(virt_irq); + } + } + return 0; +} + +static ALWAYS_INLINE int vgic_irq_disable(struct z_vcpu *vcpu, uint32_t virt_irq) +{ + struct virt_irq_desc *desc; + + desc = vgic_get_virt_irq_desc(vcpu, virt_irq); + if (!desc) { + return -ENOENT; + } + desc->virq_flags &= ~VIRQ_ENABLED_FLAG; + if (virt_irq > VM_LOCAL_VIRQ_NR) { + if (desc->virq_flags & VIRQ_HW_FLAG && vcpu->vcpu_id == 0) { + if (desc->pirq_num > VM_LOCAL_VIRQ_NR) { + irq_disable(desc->pirq_num); + } else { + return -ENODEV; + } + } + } else { + if(desc->virq_flags & VIRQ_HW_FLAG) { + irq_disable(virt_irq); + } + } + return 0; +} + +static ALWAYS_INLINE bool vgic_irq_test_bit(struct z_vcpu *vcpu, uint32_t spi_nr_count, + uint32_t *value, uint32_t bit_size, bool enable) +{ + ARG_UNUSED(enable); + ARG_UNUSED(spi_nr_count); + int bit; + uint32_t reg_mem_addr = (uint64_t)value; + for (bit=0; bit +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../drivers/interrupt_controller/intc_gicv3_priv.h" + +/* SGI mode */ +#define SGI_SIG_TO_LIST (0) +#define SGI_SIG_TO_OTHERS (1) + +/* vgic macro here */ +#define VGIC_MAX_VCPU 64 +#define VGIC_UNDEFINE_ADDR 0xFFFFFFFF + +/* vgic action */ +#define ACTION_CLEAR_VIRQ BIT(0) +#define ACTION_SET_VIRQ BIT(1) + +/* GIC control value */ +#define GICH_VMCR_VENG0 (1 << 0) +#define GICH_VMCR_VENG1 (1 << 1) +#define GICH_VMCR_VACKCTL (1 << 2) +#define GICH_VMCR_VFIQEN (1 << 3) +#define GICH_VMCR_VCBPR (1 << 4) +#define GICH_VMCR_VEOIM (1 << 9) +#define GICH_VMCR_DEFAULT_MASK (0xf8 << 24) + +#define GICH_HCR_EN (1 << 0) +#define GICH_HCR_UIE (1 << 1) +#define GICH_HCR_LRENPIE (1 << 2) +#define GICH_HCR_NPIE (1 << 3) +#define GICH_HCR_TALL1 (1 << 12) + +/* list register */ +#define LIST_REG_GTOUP0 (0) +#define LIST_REG_GROUP1 (1) +#define LIST_REG_NHW_VIRQ (0) +#define LIST_REG_HW_VIRQ (1) + +/* GICR registers offset from RDIST_base(n) */ +#define VGICR_CTLR GICR_CTLR +#define VGICR_IIDR GICR_IIDR +#define VGICR_TYPER GICR_TYPER +#define VGICR_STATUSR GICR_STATUSR +#define VGICR_WAKER GICR_WAKER +#define VGICR_PROPBASER GICR_PROPBASER +#define VGICR_PENDBASER GICR_PENDBASER +#define VGICR_ISENABLER0 0x0100 +#define VGICR_ICENABLER0 0x0180 +#define VGICR_SGI_PENDING 0x0200 +#define VGICR_SGI_ICPENDING 0x0280 +#define VGICR_PIDR2 0xFFE8 + +/* list register test and set */ +#define VGIC_LIST_REGS_TEST(id, vcpu) \ + ((((struct z_vcpu *)vcpu)->arch->list_regs_map)\ + & (1 << id)) +#define VGIC_LIST_REGS_UNSET(id, vcpu) ((((struct z_vcpu *)vcpu)->arch->list_regs_map)\ + = ((((struct z_vcpu *)vcpu)->arch->list_regs_map)\ + & (~(1 << id)))) +#define VGIC_LIST_REGS_SET(id, vcpu) ((((struct z_vcpu *)vcpu)->arch->list_regs_map)\ + = ((((struct z_vcpu *)vcpu)->arch->list_regs_map)\ + | (1 << id))) +#define VGIC_ELRSR_REG_TEST(id, elrsr) ((1 << ((id)&0x1F)) & elrsr) + +/** + * @brief vcpu vgicv3 register interface. + */ +struct gicv3_vcpuif_ctxt { + uint64_t ich_lr0_el2; + uint64_t ich_lr1_el2; + uint64_t ich_lr2_el2; + uint64_t ich_lr3_el2; + uint64_t ich_lr4_el2; + uint64_t ich_lr5_el2; + uint64_t ich_lr6_el2; + uint64_t ich_lr7_el2; + + uint32_t ich_ap0r2_el2; + uint32_t ich_ap1r2_el2; + uint32_t ich_ap0r1_el2; + uint32_t ich_ap1r1_el2; + uint32_t ich_ap0r0_el2; + uint32_t ich_ap1r0_el2; + uint32_t ich_vmcr_el2; + uint32_t ich_hcr_el2; + + uint32_t icc_ctlr_el1; + uint32_t icc_sre_el1; + uint32_t icc_pmr_el1; +}; + +/** + * @brief gicv3_list_reg register bit field, which + * provides interrupt context information for the virtual + * CPU interface. + */ +struct gicv3_list_reg { + uint64_t vINTID : 32; + uint64_t pINTID : 13; + uint64_t res0 : 3; + uint64_t priority : 8; + uint64_t res1 : 3; + uint64_t nmi : 1; + uint64_t group : 1; + uint64_t hw : 1; + uint64_t state : 2; +}; + +/** + * @brief Virtual generatic interrupt controller redistributor + * struct for each vm's vcpu. + * Each Redistributor defines four 64KB frames as follows: + * 1. RD_base + * 2. SGI_base + * 3. VLPI_base + * 4. Reserved + * TODO: support vlpi later. +*/ +struct virt_gic_gicr { + uint32_t vcpu_id; + + /* virtual gicr for emulating device for vm. */ + uint32_t *gicr_rd_reg_base; + uint32_t *gicr_sgi_reg_base; + + /** + * gicr address base and size which + * are used to locate vdev access from + * vm. + */ + uint32_t gicr_rd_base; + uint32_t gicr_sgi_base; + uint32_t gicr_rd_size; + uint32_t gicr_sgi_size; + + struct k_spinlock gicr_lock; +}; + +/** + * @brief vgicv3 virtual device struct, for emulate device. + */ +struct vgicv3_dev { + struct virt_gic_gicd gicd; + struct virt_gic_gicr *gicr[VGIC_RDIST_SIZE/VGIC_RD_SGI_SIZE]; +}; + +/** + * @brief virtual gicv3 device information. +*/ +struct gicv3_vdevice { + uint64_t gicd_base; + uint64_t gicd_size; + uint64_t gicr_base; + uint64_t gicr_size; +}; + +/** + * @brief gic vcpu interface init. + */ +int vcpu_gicv3_init(struct gicv3_vcpuif_ctxt *ctxt); + +/** + * @brief Check if the virtual interrupt is pending. + */ +bool virt_irq_ispending(struct z_vcpu *vcpu); + +/** + * @brief before enter vm, we need to load the vcpu interrupt state. + */ +int vgicv3_state_load(struct z_vcpu *vcpu, struct gicv3_vcpuif_ctxt *ctxt); + +/** + * @brief before exit from vm, we need to store the vcpu interrupt state. + */ +int vgicv3_state_save(struct z_vcpu *vcpu, struct gicv3_vcpuif_ctxt *ctxt); + +/** + * @brief send a virq to vm for el1 trap. + */ +int gicv3_inject_virq(struct z_vcpu *vcpu, struct virt_irq_desc *desc); + +/** + * @brief gic redistribute vdev mem read. + */ +int vgic_gicrsgi_mem_read(struct z_vcpu *vcpu, struct virt_gic_gicr *gicr, + uint32_t offset, uint64_t *v); + +/** + * @brief gic redistribute sgi vdev mem write + */ +int vgic_gicrsgi_mem_write(struct z_vcpu *vcpu, struct virt_gic_gicr *gicr, + uint32_t offset, uint64_t *v); + +/** + * @brief gic redistribute rd vdev mem read + */ +int vgic_gicrrd_mem_read(struct z_vcpu *vcpu, struct virt_gic_gicr *gicr, + uint32_t offset, uint64_t *v); + +/** + * @brief gic redistribute rd vdev mem write. + */ +int vgic_gicrrd_mem_write(struct z_vcpu *vcpu, struct virt_gic_gicr *gicr, + uint32_t offset, uint64_t *v); + +/** + * @brief get gicr address type. + */ +struct virt_gic_gicr* get_vcpu_gicr_type(struct vgicv3_dev *vgic, uint32_t addr, + uint32_t *type, uint32_t *offset); +/** + * @brief raise a sgi signal to a vcpu. + */ +int vgicv3_raise_sgi(struct z_vcpu *vcpu, unsigned long sgi_value); + +/** + * @brief init vgicv3 device for the vm. +*/ +struct vgicv3_dev *vgicv3_dev_init(struct z_vm *vm); + +/** + * @brief When VMs enable or disable register, zvm will test + * related bit and set it to correct value. This function is used + * for irq enable or disable flag; +*/ +static ALWAYS_INLINE void vgic_test_and_set_enable_bit(struct z_vcpu *vcpu, uint32_t spi_nr_count, + uint32_t *value, uint32_t bit_size, bool enable, void *vgic_priv) +{ + int bit; + uint32_t reg_mem_addr = (uint64_t)value; + struct virt_gic_gicd *gicd = NULL; + struct virt_gic_gicr *gicr = NULL; + + for (bit=0; bitgicr_sgi_reg_base, VGICR_ISENABLER0) | BIT(bit),\ + gicr->gicr_sgi_reg_base, VGICR_ISENABLER0); + }else{ + gicd = (struct virt_gic_gicd *)vgic_priv; + vgic_sysreg_write32(vgic_sysreg_read32(gicd->gicd_regs_base, VGICD_ISENABLERn) | BIT(bit),\ + gicd->gicd_regs_base, VGICD_ISENABLERn); + } + } else { + /* TODO: add a situation for disable irq interrupt later */ + if (*value != DEFAULT_DISABLE_IRQVAL) { + vgic_irq_disable(vcpu, spi_nr_count + bit); + } + if(spi_nr_count < VM_LOCAL_VIRQ_NR){ + gicr = (struct virt_gic_gicr *)vgic_priv; + vgic_sysreg_write32(vgic_sysreg_read32(gicr->gicr_sgi_reg_base, VGICR_ICENABLER0) & ~BIT(bit),\ + gicr->gicr_sgi_reg_base, VGICR_ICENABLER0); + }else{ + gicd = (struct virt_gic_gicd *)vgic_priv; + vgic_sysreg_write32(vgic_sysreg_read32(gicd->gicd_regs_base, VGICD_ICENABLERn) | ~BIT(bit),\ + gicd->gicd_regs_base, VGICD_ICENABLERn); + } + } + } + } +} + +/** + * @brief When VM write ispending or icpending flag, we + * should set/unset irq signal to VM. +*/ +static ALWAYS_INLINE void vgic_test_and_set_pending_bit(struct z_vcpu *vcpu, uint32_t spi_nr_count, + uint32_t *value, uint32_t bit_size, bool enable, void *vgic_priv) +{ + int bit; + uint32_t reg_mem_addr = (uint64_t)value; + struct virt_gic_gicd *gicd = NULL; + struct virt_gic_gicr *gicr = NULL; + + for (bit=0; bit= VM_GLOBAL_VIRQ_NR) { + /* spi num is too big. */ + return; + } + set_virq_to_vm(vcpu->vm, spi_nr_count + bit); + if(spi_nr_count < VM_LOCAL_VIRQ_NR){ + gicr = (struct virt_gic_gicr *)vgic_priv; + vgic_sysreg_write32(vgic_sysreg_read32(gicr->gicr_sgi_reg_base, VGICR_SGI_PENDING) | BIT(bit),\ + gicr->gicr_sgi_reg_base, VGICR_SGI_PENDING); + }else{ + gicd = (struct virt_gic_gicd *)vgic_priv; + vgic_sysreg_write32(vgic_sysreg_read32(gicd->gicd_regs_base, VGICD_ISPENDRn) | BIT(bit),\ + gicd->gicd_regs_base, VGICD_ISPENDRn); + } + } else { + if (spi_nr_count + bit >= VM_GLOBAL_VIRQ_NR) { + return; + } + unset_virq_to_vm(vcpu->vm, spi_nr_count + bit); + if(spi_nr_count < VM_LOCAL_VIRQ_NR){ + gicr = (struct virt_gic_gicr *)vgic_priv; + vgic_sysreg_write32(vgic_sysreg_read32(gicr->gicr_sgi_reg_base, VGICR_SGI_ICPENDING) & ~BIT(bit),\ + gicr->gicr_sgi_reg_base, VGICR_SGI_ICPENDING); + }else{ + gicd = (struct virt_gic_gicd *)vgic_priv; + vgic_sysreg_write32(vgic_sysreg_read32(gicd->gicd_regs_base, VGICD_ICPENDRn) | ~BIT(bit),\ + gicd->gicd_regs_base, VGICD_ICPENDRn); + } + } + } + } +} + +static ALWAYS_INLINE uint64_t gicv3_read_lr(uint8_t register_id) +{ + switch (register_id) { + case 0: + return read_sysreg(ICH_LR0_EL2); + case 1: + return read_sysreg(ICH_LR1_EL2); + case 2: + return read_sysreg(ICH_LR2_EL2); + case 3: + return read_sysreg(ICH_LR3_EL2); + case 4: + return read_sysreg(ICH_LR4_EL2); + case 5: + return read_sysreg(ICH_LR5_EL2); + case 6: + return read_sysreg(ICH_LR6_EL2); + case 7: + return read_sysreg(ICH_LR7_EL2); + default: + return 0; + } +} + +static ALWAYS_INLINE void gicv3_write_lr(uint8_t register_id, uint64_t value) +{ + switch (register_id) { + case 0: + write_sysreg(value, ICH_LR0_EL2); + break; + case 1: + write_sysreg(value, ICH_LR1_EL2); + break; + case 2: + write_sysreg(value, ICH_LR2_EL2); + break; + case 3: + write_sysreg(value, ICH_LR3_EL2); + break; + case 4: + write_sysreg(value, ICH_LR4_EL2); + break; + case 5: + write_sysreg(value, ICH_LR5_EL2); + break; + case 6: + write_sysreg(value, ICH_LR6_EL2); + break; + case 7: + write_sysreg(value, ICH_LR7_EL2); + break; + default: + return; + } +} + +/** + * @brief Get virq state from register. + */ +static ALWAYS_INLINE uint8_t gicv3_get_lr_state(struct z_vcpu *vcpu, struct virt_irq_desc *desc) +{ + uint64_t value; + + if (desc->id >= VGIC_TYPER_LR_NUM) { + return 0; + } + value = gicv3_read_lr(desc->id); + value = (value >> 62) & 0x03; + + return ((uint8_t)value); +} + +/** + * @brief Find the idle list register. +*/ +static ALWAYS_INLINE uint8_t gicv3_get_idle_lr(struct z_vcpu *vcpu) +{ + uint8_t i; + for (i=0; iid, 0); + VGIC_LIST_REGS_UNSET(desc->id, vcpu); + break; + case ACTION_SET_VIRQ: + gicv3_write_lr(desc->id, value); + VGIC_LIST_REGS_SET(desc->id, vcpu); + break; + } +} + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_ARM_VGIC_V3_H_ */ diff --git a/include/zephyr/zvm/vdev/virt_psci.h b/include/zephyr/zvm/vdev/virt_psci.h new file mode 100644 index 00000000000000..5b0b1b2ba9bba8 --- /dev/null +++ b/include/zephyr/zvm/vdev/virt_psci.h @@ -0,0 +1,25 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_VDEV_VIRT_PSCI_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_VDEV_VIRT_PSCI_H_ + +#include +#include + +/* psci func for vcpu */ +uint64_t psci_vcpu_suspend(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt); +uint64_t psci_vcpu_off(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt); +uint64_t psci_vcpu_affinity_info(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt); +uint64_t psci_vcpu_migration(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt); +uint64_t psci_vcpu_migration_info_type(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt); +uint64_t psci_vcpu_other(unsigned long psci_func); +uint64_t psci_vcpu_on(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt); + +int do_psci_call(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt); + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_VDEV_VIRT_PSCI_H_ */ \ No newline at end of file diff --git a/include/zephyr/zvm/vdev/vpl011.h b/include/zephyr/zvm/vdev/vpl011.h new file mode 100644 index 00000000000000..b60558c7e6ff7c --- /dev/null +++ b/include/zephyr/zvm/vdev/vpl011.h @@ -0,0 +1,122 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Yuhao Hu, Qingqiao Wang and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_ZVM_VDEV_VPL011_H_ +#define ZEPHYR_INCLUDE_ZVM_VDEV_VPL011_H_ + +#include +#include + +#define VSERIAL_REG_BASE DT_REG_ADDR(DT_INST(0, arm_pl011)) +#define VSERIAL_REG_SIZE DT_REG_SIZE(DT_INST(0, arm_pl011)) +#define VSERIAL_HIRQ_NUM DT_IRQN(DT_INST(0, arm_pl011)) + +#define ARM_PL011_ID {0x11, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1} +#define PL011_INT_TX 0x20 +#define PL011_INT_RX 0x10 +#define PL011_FLAG_TXFE 0x80 +#define PL011_FLAG_RXFF 0x40 +#define PL011_FLAG_TXFF 0x20 +#define PL011_FLAG_RXFE 0x10 + +/* Regs Write/Read 32/64 Op*/ +#define vserial_sysreg_read32(base, offset) sys_read32((long unsigned int)(base+((offset)/4))) +#define vserial_sysreg_write32(data, base, offset) sys_write32(data, (long unsigned int)(base+((offset)/4))) +#define vserial_sysreg_read64(base, offset) sys_read64((long unsigned int)(base+((offset)/4))) +#define vserial_sysreg_write64(data, base, offset) sys_write64(data, (long unsigned int)(base+((offset)/4))) + + +/* + * VUART PL011 register map structure + */ +struct vpl011_regs_ctxt { + uint32_t dr; /* base + 0x00 , 0*/ + union { /* base + 0x04 , 1*/ + uint32_t rsr; + uint32_t ecr; + }; + uint32_t reserved_0[4]; /* base + 0x08 , 2 ~ 5*/ + uint32_t fr; /* base + 0x18 , 6*/ + uint32_t reserved_1; /* base + 0x1c , 7*/ + uint32_t ilpr; /* base + 0x20 , 8*/ + uint32_t ibrd; /* base + 0x24 , 9*/ + uint32_t fbrd; /* base + 0x28 , 10*/ + uint32_t lcr_h; /* base + 0x2c , 11*/ + uint32_t cr; /* base + 0x30 , 12*/ + uint32_t ifls; /* base + 0x34 , 13*/ + uint32_t imsc; /* base + 0x38 , 14*/ + uint32_t ris; /* base + 0x3c , 15*/ + uint32_t mis; /* base + 0x40 , 16*/ + uint32_t icr; /* base + 0x44 , 17*/ + uint32_t dmacr; /* base + 0x48 , 18*/ + uint8_t id[8]; /* base + 0xfe0 */ +}; + +#define FIFO_SIZE 16 + +#define VPL011_BIT_MASK(x, y) (((2 << x) - 1) << y) +/* VPL011 Uart Flags Register */ +#define VPL011_FR_CTS BIT(0) /* clear to send - inverted */ +#define VPL011_FR_DSR BIT(1) /* data set ready - inverted */ +#define VPL011_FR_DCD BIT(2) /* data carrier detect - inverted */ +#define VPL011_FR_BUSY BIT(3) /* busy transmitting data */ +#define VPL011_FR_RXFE BIT(4) /* receive FIFO empty */ +#define VPL011_FR_TXFF BIT(5) /* transmit FIFO full */ +#define VPL011_FR_RXFF BIT(6) /* receive FIFO full */ +#define VPL011_FR_TXFE BIT(7) /* transmit FIFO empty */ +#define VPL011_FR_RI BIT(8) /* ring indicator - inverted */ + +#define VPL011_INT_RX 0x10 +#define VPL011_INT_TX 0x20 +/* VPL011 Interrupt Mask Set/Clear Register */ +#define VPL011_IMSC_RIMIM BIT(0) /* RTR modem interrupt mask */ +#define VPL011_IMSC_CTSMIM BIT(1) /* CTS modem interrupt mask */ +#define VPL011_IMSC_DCDMIM BIT(2) /* DCD modem interrupt mask */ +#define VPL011_IMSC_DSRMIM BIT(3) /* DSR modem interrupt mask */ +#define VPL011_IMSC_RXIM BIT(4) /* receive interrupt mask */ +#define VPL011_IMSC_TXIM BIT(5) /* transmit interrupt mask */ +#define VPL011_IMSC_RTIM BIT(6) /* receive timeout interrupt mask */ +#define VPL011_IMSC_FEIM BIT(7) /* framine error interrupt mask */ +#define VPL011_IMSC_PEIM BIT(8) /* parity error interrupt mask */ +#define VPL011_IMSC_BEIM BIT(9) /* break error interrupt mask */ +#define VPL011_IMSC_OEIM BIT(10) /* overrun error interrutpt mask */ + +#define VPL011_PRIV(vdev) \ + ((struct virt_pl011 *)(vdev)->priv_vdev) +#define VDEV_REGS(vdev) \ + ((volatile struct vpl011_regs_ctxt *)(VPL011_PRIV(vdev))->vserial_reg_base) +#define VPL011_REGS(vpl011) \ + ((volatile struct vpl011_regs_ctxt *)(vpl011)->vserial_reg_base) + +struct virt_pl011 { + + /* virtual serial for emulating device for vm. */ + uint32_t *vserial_reg_base; + + /** + * serial address base and size which + * are used to locate vdev access from + * vm. + */ + uint32_t vserial_base; + uint32_t vserial_size; + + struct z_vm * vm; + struct k_fifo rx_fifo; + struct k_spinlock vserial_lock; + + uint32_t irq; + uint32_t enabled; + uint32_t level; + uint32_t set_irq; + uint32_t count; + + bool connecting; + void *vserial; +}; + +#endif /* ZEPHYR_INCLUDE_ZVM_VDEV_VPL011_H_ */ diff --git a/include/zephyr/zvm/vdev/vserial.h b/include/zephyr/zvm/vdev/vserial.h new file mode 100644 index 00000000000000..bb10b300c1573c --- /dev/null +++ b/include/zephyr/zvm/vdev/vserial.h @@ -0,0 +1,76 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Yuhao Hu, Qingqiao Wang and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_ZVM_VIRT_SERIAL_H_ +#define ZEPHYR_INCLUDE_ZVM_VIRT_SERIAL_H_ + +#include +#include +#include +#include + +#define VM_FIELD_NAME_SIZE 32 +#define EXIT_VSERIAL_KEY 0x18 /* CTRL+X */ +#define SEND_BUFFER_SIZE 16 +#define VIRT_SERIAL_NAME vpl011 + +/** + * @brief data strcut for k_fifo + */ +struct K_fifo_data { + intptr_t _unused; + uint8_t data[1]; +}; + +/** Virtual serial port */ +struct virt_serial { + sys_dnode_t node; + char name[VM_FIELD_NAME_SIZE]; + int (*send) (struct virt_serial *vserial, unsigned char * data,int len); + struct K_fifo_data send_buffer[SEND_BUFFER_SIZE]; + uint32_t count; + void *priv; + void *vm; +}; + +struct z_virt_serial_ctrl { + struct k_mutex virt_serial_list_lock; + sys_dlist_t virt_serial_list; + bool connecting; + uint8_t connecting_vm_id; + struct virt_serial *connecting_virt_serial; +}; + +static inline void *get_virt_serial_device(struct virt_serial *vserial) +{ + return (vserial) ? vserial->priv : NULL; +} + + +/** + * @brief Create a virtual serial port + */ +struct virt_serial * virt_serial_create(const char *name, + int (*send) (struct virt_serial *, unsigned char *, int ), + void *priv); +/** + * @brief Destroy a virtual serial port + */ +int virt_serial_destroy(struct virt_serial *vserial); + +/** Count of available virtual serial ports */ +uint32_t virt_serial_count(void); + +struct virt_serial* get_vserial(uint8_t vmid); + +void transfer(const struct shell *shell, unsigned char *data, size_t len); + +void uart_poll_out_to_host(unsigned char data); + +int switch_virtual_serial_handler(const struct shell *shell, size_t argc, char **argv); + +#endif /* ZEPHYR_INCLUDE_ZVM_VIRT_SERIAL_H_ */ diff --git a/include/zephyr/zvm/vm.h b/include/zephyr/zvm/vm.h new file mode 100644 index 00000000000000..f27a4de725365b --- /dev/null +++ b/include/zephyr/zvm/vm.h @@ -0,0 +1,246 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_ZVM_VM_H_ +#define ZEPHYR_INCLUDE_ZVM_VM_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEFAULT_VM (0) +#define VM_NAME_LEN (32) +#define VCPU_NAME_LEN (32) +#define RAMDISK_NAME_LEN (32) + +#define VCPU_THREAD_STACKSIZE (40960) +#define VCPU_THREAD_PRIO (1) + +/** + * @brief VM Status. + */ +#define VM_STATE_NEVER_RUN (BIT(0)) +#define VM_STATE_RUNNING (BIT(1)) +#define VM_STATE_PAUSE (BIT(2)) +#define VM_STATE_HALT (BIT(3)) +#define VM_STATE_RESET (BIT(4)) + +/** + * @brief VM return values. + */ +#define SET_IRQ_TO_VM_SUCCESS (1) +#define UNSET_IRQ_TO_VM_SUCCESS (2) +#define VM_IRQ_TO_VCPU_SUCCESS (3) +#define VM_IRQ_NUM_OUT (99) + +#define _VCPU_STATE_READY (BIT(0)) +#define _VCPU_STATE_RUNNING (BIT(1)) +#define _VCPU_STATE_PAUSED (BIT(2)) +#define _VCPU_STATE_HALTED (BIT(3)) +#define _VCPU_STATE_UNKNOWN (BIT(4)) +#define _VCPU_STATE_RESET (BIT(5)) + +#define VCPU_THREAD(thread) ((struct k_thread *)thread->vcpu_struct ? true: false) + +#ifdef CONFIG_ZVM +#define _current_vcpu _current->vcpu_struct +#else +#define _current_vcpu NULL +#endif + +#define get_current_vcpu_id() \ +({ \ + struct z_vcpu *vcpu = (struct z_vcpu *)_current_vcpu; \ + vcpu->vcpu_id; \ +}) + +#define get_current_vm() \ +({ \ + struct z_vcpu *vcpu = (struct z_vcpu *)_current_vcpu; \ + vcpu->vm; \ +}) + +#define vcpu_need_switch(tid1, tid2) ((VCPU_THREAD(tid1)) || (VCPU_THREAD(tid2))) + +/* VM debug console uart hardware info. */ +#define VM_DEBUG_CONSOLE_BASE DT_REG_ADDR(DT_CHOSEN(vm_console)) +#define VM_DEBUG_CONSOLE_SIZE DT_REG_SIZE(DT_CHOSEN(vm_console)) +#define VM_DEBUG_CONSOLE_IRQ DT_IRQN(DT_CHOSEN(vm_console)) + +#define VM_DEFAULT_CONSOLE_NAME "UART" +#define VM_DEFAULT_CONSOLE_NAME_LEN (4) + + +struct z_vcpu { + struct vcpu_arch *arch; + bool resume_signal; + bool waitq_flag; + + uint16_t vcpu_id; + uint16_t cpu; + uint16_t vcpu_state; + uint16_t exit_type; + + /** + * vcpu may be influeced by host cpu, so we need to record + * the vcpu ipi staus. + * Just when vcpu call xxx_raise_sgi, the vcpuipi_count will + * be plused. The default vaule is 0. + */ + uint64_t vcpuipi_count; + + /* vcpu timers record*/ + uint32_t hcpu_cycles; + uint32_t runnig_cycles; + uint32_t paused_cycles; + + struct k_spinlock vcpu_lock; + + /* virt irq block for this vcpu */ + struct vcpu_virt_irq_block virq_block; + + struct z_vcpu *next_vcpu; + struct vcpu_work *work; + struct z_vm *vm; + + /* vcpu's thread wait queue */ + _wait_q_t *t_wq; + + sys_dlist_t vcpu_lists; + + bool is_poweroff; +}; +typedef struct z_vcpu vcpu_t; + +/** + * @brief Describes the thread that vcpu binds to. + */ +struct __aligned(4) vcpu_work { + /* statically allocate stack space */ + K_KERNEL_STACK_MEMBER(vt_stack, VCPU_THREAD_STACKSIZE); + + /* vCPU thread */ + struct k_thread *vcpu_thread; + + /* point to vcpu struct */ + void *v_date; +}; + +/** + * @brief The initial information used to create the virtual machine. + */ +struct vm_desc { + uint16_t vmid; + char name[VM_NAME_LEN]; + + char vm_dtb_image_name[RAMDISK_NAME_LEN]; + char vm_kernel_image_name[RAMDISK_NAME_LEN]; + + int32_t vcpu_num; + uint64_t mem_base; + uint64_t mem_size; + + /* vm's code entry */ + uint64_t entry; + + /* vm's states*/ + uint64_t flags; + uint64_t image_load_address; +}; + +/** + * @brief Record vm's vcpu num. + * Recommend:Consider deleting + */ +struct vm_vcpu_num { + uint16_t count; + struct k_spinlock vcpu_id_lock; +}; + +struct vm_arch { + uint64_t vm_pgd_base; + uint64_t vttbr; + uint64_t vtcr_el2; +}; + +struct z_vm { + bool is_rtos; + uint16_t vmid; + char vm_name[VM_NAME_LEN]; + bool reboot; + + uint32_t vm_status; + + uint32_t vcpu_num; + struct vm_vcpu_num vm_vcpu_id_count; + + uint32_t vtimer_offset; + + struct vm_virt_irq_block vm_irq_block; + + struct k_spinlock spinlock; + + struct z_vcpu **vcpus; + struct k_sem *vcpu_exit_sem; + + struct vm_arch *arch; + struct vm_mem_domain *vmem_domain; + struct z_os *os; + + /* bind the vm and the os type ops */ + struct zvm_ops *ops; + + /* store the vm's dev list */ + sys_dlist_t vdev_list; +}; + +int vm_ops_init(struct z_vm *vm); + +/** + * @brief Init guest vm memory manager: + * this function aim to init vm's memory manger,for below step: + * 1. allocate virt space to vm(base/size), and distribute vpart_list to it. + * 2. add this vpart to mapped_vpart_list. + * 3. divide vpart area to block and init block list, + * then allocate physical space to these block. + * 4. build page table from vpart virt address to block physical address. + * + * @param vm: The vm which memory need to be init. + * @return int 0 for success + */ +int vm_mem_init(struct z_vm *vm); + +int vm_vcpus_create(uint16_t vcpu_num, struct z_vm *vm); +int vm_vcpus_init(struct z_vm *vm); +int vm_vcpus_ready(struct z_vm *vm); +int vm_vcpus_pause(struct z_vm *vm); +int vm_vcpus_halt(struct z_vm *vm); +int vm_vcpus_reset(struct z_vm *vm); +int vm_delete(struct z_vm *vm); + +int z_parse_run_vm_args(size_t argc, char **argv); +int z_parse_pause_vm_args(size_t argc, char **argv); +int z_parse_delete_vm_args(size_t argc, char **argv); +int z_parse_info_vm_args(size_t argc, char **argv); + +int z_parse_new_vm_args(size_t argc, char **argv, struct z_os_info *vm_info, struct z_vm *vm); + +int z_list_vms_info(uint16_t vmid); +int vm_sysinfo_init(size_t argc, char **argv, struct z_vm *vm_ptr, struct z_os_info *vm_info); + +int vm_ipi_handler(struct z_vm *vm); + +int vm_create(struct z_os_info *vm_info, struct z_vm *new_vm); + +#endif /* ZEPHYR_INCLUDE_ZVM_VM_H_ */ diff --git a/include/zephyr/zvm/vm_cpu.h b/include/zephyr/zvm/vm_cpu.h new file mode 100644 index 00000000000000..9ccf9ba85b1ba2 --- /dev/null +++ b/include/zephyr/zvm/vm_cpu.h @@ -0,0 +1,168 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_ZVM_VM_CPU_H_ +#define ZEPHYR_INCLUDE_ZVM_VM_CPU_H_ + +#include +#include +#include +#ifdef CONFIG_ARM64 +#include +#include +#include +#include +#endif + +/** + * @brief We should define overall priority for zvm system. + * A total of 15 priorities are defined by setting + * CONFIG_NUM_PREEMPT_PRIORITIES = 15 and can be divided into three categories: + * 1 -> 5: high real-time requirement and very critical to the system. + * 6 -> 10: no real-time requirement and very critical to the system. + * 10 -> 15: normal. + */ +#define RT_VM_WORK_PRIORITY (5) +#define NORT_VM_WORK_PRIORITY (10) + +#ifdef CONFIG_PREEMPT_ENABLED +/* positive num */ +#define VCPU_RT_PRIO RT_VM_WORK_PRIORITY +#define VCPU_NORT_PRIO NORT_VM_WORK_PRIORITY +#else +/* negetive num */ +#define VCPU_RT_PRIO K_HIGHEST_THREAD_PRIO + RT_VM_WORK_PRIORITY +#define VCPU_NORT_PRIO K_HIGHEST_THREAD_PRIO + NORT_VM_WORK_PRIORITY +#endif + +#define VCPU_IPI_MASK_ALL (0xffffffff) + +#define DEFAULT_VCPU (0) + +/* For clear warning for unknow reason */ +struct vcpu; + +volatile static uint32_t used_cpus = 0; +static struct k_spinlock cpu_mask_lock; + +/** + * @brief allocate a vcpu struct and init it. + */ +struct z_vcpu *vm_vcpu_init(struct z_vm *vm, uint16_t vcpu_id, char *vcpu_name); + +/** + * @brief release vcpu struct. + */ +int vm_vcpu_deinit(struct z_vcpu *vcpu); + +/** + * @brief the vcpu has below state: + * running: vcpu is running, and is allocated to physical cpu. + * ready: prepare to running. +*/ +int vm_vcpu_ready(struct z_vcpu *vcpu); +int vm_vcpu_pause(struct z_vcpu *vcpu); +int vm_vcpu_halt(struct z_vcpu *vcpu); +int vm_vcpu_reset(struct z_vcpu *vcpu); + +/** + * @brief vcpu run func entry. + */ +int vcpu_thread_entry(struct z_vcpu *vcpu); + +int vcpu_state_switch(struct k_thread *thread, uint16_t new_state); + +void do_vcpu_swap(struct k_thread *new_thread, struct k_thread *old_thread); +void do_asm_vcpu_swap(struct k_thread *new_thread, struct k_thread *old_thread); + +/** + * @brief vcpu ipi schduler to inform system schduler to schdule vcpu. + */ +int vcpu_ipi_scheduler(uint32_t cpu_mask, uint32_t timeout); + +static ALWAYS_INLINE int rt_get_idle_cpu(void) { + for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) { +#ifdef CONFIG_SMP + /* In SMP, _current is a field read from _current_cpu, which + * can race with preemption before it is read. We must lock + * local interrupts when reading it. + */ + unsigned int k = arch_irq_lock(); +#endif + k_tid_t tid = _kernel.cpus[i].current; +#ifdef CONFIG_SMP + arch_irq_unlock(k); +#endif + int prio = k_thread_priority_get(tid); + if (prio == K_IDLE_PRIO || (prio < K_IDLE_PRIO && prio > VCPU_RT_PRIO)) { + return i; + } + } + return -ESRCH; +} + +static ALWAYS_INLINE int nrt_get_idle_cpu(void) { + for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) { +#ifdef CONFIG_SMP + /* In SMP, _current is a field read from _current_cpu, which + * can race with preemption before it is read. We must lock + * local interrupts when reading it. + */ + unsigned int k = arch_irq_lock(); +#endif + k_tid_t tid = _kernel.cpus[i].current; +#ifdef CONFIG_SMP + arch_irq_unlock(k); +#endif + int prio = k_thread_priority_get(tid); + if (prio == K_IDLE_PRIO) { + return i; + } + } + return -ESRCH; +} + +static ALWAYS_INLINE int get_static_idle_cpu(void) +{ + k_spinlock_key_t key; + + for (int i = 1; i < CONFIG_MP_NUM_CPUS; i++) { +#ifdef CONFIG_SMP + /* In SMP, _current is a field read from _current_cpu, which + * can race with preemption before it is read. We must lock + * local interrupts when reading it. + */ + unsigned int k = arch_irq_lock(); +#endif + k_tid_t tid = _kernel.cpus[i].current; +#ifdef CONFIG_SMP + arch_irq_unlock(k); +#endif + int prio = k_thread_priority_get(tid); + if (prio == K_IDLE_PRIO && !(used_cpus & (1 << i))) { + key = k_spin_lock(&cpu_mask_lock); + used_cpus |= (1 << i); + k_spin_unlock(&cpu_mask_lock, key); + return i; + } + } + return -ESRCH; +} + +static ALWAYS_INLINE void reset_idle_cpu(uint16_t cpu_id) +{ + k_spinlock_key_t key; + + if (used_cpus & (1 << cpu_id)) { + key = k_spin_lock(&cpu_mask_lock); + used_cpus &= ~(1 << cpu_id); + k_spin_unlock(&cpu_mask_lock, key); + } + barrier_isync_fence_full(); +} + +#endif /* ZEPHYR_INCLUDE_ZVM_VM_CPU_H_ */ diff --git a/include/zephyr/zvm/vm_device.h b/include/zephyr/zvm/vm_device.h new file mode 100644 index 00000000000000..a55a58ae30fcb0 --- /dev/null +++ b/include/zephyr/zvm/vm_device.h @@ -0,0 +1,262 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Charlie, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_ZVM_VM_DEVICE_H_ +#define ZEPHYR_INCLUDE_ZVM_VM_DEVICE_H_ + +#include +#include +#include +#include +#include + +#define VIRT_DEV_NAME_LENGTH (32) +#define VIRT_DEV_TYPE_LENGTH (32) + +#define DEV_TYPE_EMULATE_ALL_DEVICE (0x01) +#define DEV_TYPE_VIRTIO_DEVICE (0x02) +#define DEV_TYPE_PASSTHROUGH_DEVICE (0x03) + +#define VM_DEVICE_INIT_RES (0xFF) +#define VM_DEVICE_INVALID_BASE (0xFFFFFFFF) +#define VM_DEVICE_INVALID_VIRQ (0xFF) + +/** + * @brief DEVICE_TYPE for each vm, whcih is used to + * judge whether it is nesscary to init when vm creat. +*/ +#define VM_DEVICE_PRE_KERNEL_1 (0x01) + +#define TOSTRING(x) STRINGIFY(x) + + +typedef void (*virt_device_irq_callback_user_data_set_t)(const struct device *dev, + void *cb, void *user_data); + +struct z_virt_dev { + /* name of virtual device */ + char name[VIRT_DEV_NAME_LENGTH]; + + /* Is this dev pass-through device? */ + bool dev_pt_flag; + /* Is this dev virtio device?*/ + bool shareable; + + uint32_t hirq; + uint32_t virq; + + uint32_t vm_vdev_paddr; + uint32_t vm_vdev_vaddr; + uint32_t vm_vdev_size; + + struct _dnode vdev_node; + struct z_vm *vm; + + /** + * Device private data may be usefull, + * 1. For full virtual device, it store the emulated device's driver, + * for example: virt_device instance. + * 2. For passthrough device, it store the hardware instance data. + */ + const void *priv_data; + + /** + * Binding to full virtual device driver. + */ + void *priv_vdev; +}; +typedef struct z_virt_dev virt_dev_t; + +/** + * @brief Get private date for vm. +*/ +struct virt_device_data { + /* virtual device types for vm, can be seen in macro 'DEVICE_TYPE'. */ + uint16_t vdevice_type; + /* Get the virt device data port*/ + void *device_data; +#ifdef CONFIG_VIRT_DEVICE_INTERRUPT_DRIVEN + virt_device_irq_callback_user_data_set_t irq_cb; + void *irq_cb_data; +#endif +}; + +/** + * @brief Get the device instance from dts, which include + * the origin `device/config` from zephyr's device framwork. +*/ +struct virt_device_config { + /* Regisiter base and size from dts*/ + uint32_t reg_base; + uint32_t reg_size; + uint32_t hirq_num; + char device_type[VIRT_DEV_TYPE_LENGTH]; + /* Address of device instance config information */ + const void *device_config; +}; + +/** + * @brief A virt device api for init/deinit or read/write device. +*/ +struct virt_device_api { + int (*init_fn)(const struct device *dev, struct z_vm *vm, struct z_virt_dev *vdev_desc); + int (*deinit_fn)(const struct device *dev, struct z_vm *vm, struct z_virt_dev *vdev_desc); + int (*virt_device_write)(struct z_virt_dev *vdev, uint64_t addr, uint64_t *value, uint16_t size); + int (*virt_device_read)(struct z_virt_dev *vdev, uint64_t addr, uint64_t *value, uint16_t size); +#ifdef CONFIG_VIRT_DEVICE_INTERRUPT_DRIVEN + void (*virt_irq_callback_set)(const struct device *dev, void *cb, void *user_data); +#endif + /* Get the device driver api, if the device driver is initialed in host */ + const void *device_driver_api; +}; + +/** + * @brief Virtual devices backend instance in zvm. +*/ +struct virtual_device_instance { + const char *name; + struct virt_device_data *data; + struct virt_device_config *cfg; + const struct virt_device_api *api; +}; + +/** + * @brief Macro for creating a virtual device instance. + * + * @param _init_fn Init function for virtual device. + * @param _level Init level. + * @param _prio Init priority. + * @param _name Name of the virtual device instance. + * @param _data Date of the virtual device instance. + * @param _cfg Configuration of virtual device. + * @param _api Virtual device backend API. + * @param ... Optional context. + */ +#define ZVM_VIRTUAL_DEVICE_DEFINE(_init_fn, _level, _prio, _name, _data, _cfg, _api, ...) \ + SYS_INIT_NAMED(_init_fn, _init_fn, _level, _prio); \ + static const STRUCT_SECTION_ITERABLE(virtual_device_instance, _name) = \ + { \ + .name = STRINGIFY(_name), \ + .data = &_data, \ + .cfg = &_cfg, \ + .api = &_api, \ + } + +/* The overall virtual devices instances. */ +extern const struct virtual_device_instance __virtual_device_instances_start[]; +extern const struct virtual_device_instance __virtual_device_instances_end[]; + +/** + * @brief Save the overall idle dev list info. + * Smp condition must be considered here. + */ +struct zvm_dev_lists { + uint16_t dev_count; + sys_dlist_t dev_idle_list; + sys_dlist_t dev_used_list; + /*TODO: Add smp lock here*/ +}; + +struct device_chosen { + bool chosen_flag; + struct k_spinlock lock; +}; + +/** + * @brief Get virtual device. + * + * @return Pointer to the virtual device instance. + */ +static inline const struct virtual_device_instance *zvm_virtual_device_get(uint32_t idx) +{ + return &__virtual_device_instances_start[idx]; +} + +/** + * @brief Get number of virtual devices. + * + * @return Number of virtual devices. +*/ +static inline int zvm_virtual_devices_count_get(void) +{ + return __virtual_device_instances_end - __virtual_device_instances_start; +} + +/** + * @brief Set the IRQ callback function pointer. + * + * This sets up the callback for IRQ. When an IRQ is triggered, + * the specified function will be called with specified user data. + * + * @param dev virt device structure. + * @param cb Pointer to the callback function. + * @param user_data Data to pass to callback function. + */ +static inline void vdev_irq_callback_user_data_set(const struct device *dev, + void *cb, void *user_data) +{ +#ifdef CONFIG_VIRT_DEVICE_INTERRUPT_DRIVEN + const struct virt_device_api *api = + (const struct virt_device_api *)dev->api; + + if ((api != NULL) && (api->virt_irq_callback_set != NULL)) { + api->virt_irq_callback_set(dev, cb, user_data); + } +#endif +} + +/** + * @brief Allocate device to vm, it will be called when device that will be + * allocated to vm. Then, Set the device's irq for binding virt interrupt + * with hardware interrupt. + * + * @return virt device instance. +*/ +struct z_virt_dev *allocate_device_to_vm(const struct device *dev, struct z_vm *vm, + struct z_virt_dev *vdev_desc, bool pt_flag, bool shareable); + +/** + * @brief vm virt device call back function, which will be called when the device + * that allocated to vm is triggerd. +*/ +void vm_device_callback_func(const struct device *dev, void *cb, void *user_data); + + +struct z_virt_dev *vm_virt_dev_add(struct z_vm *vm, const char *dev_name, bool pt_flag, + bool shareable, uint64_t dev_pbase, uint64_t dev_vbase, + uint32_t dev_size, uint32_t dev_hirq, uint32_t dev_virq); + +int vm_virt_dev_remove(struct z_vm *vm, struct z_virt_dev *vm_dev); + +/** + * @brief write or read vdev for VM operation.... + */ +int vdev_mmio_abort(arch_commom_regs_t *regs, int write, uint64_t addr, uint64_t *value, uint16_t size); + +/** + * @brief unmap passthrough device. + */ +int vm_unmap_ptdev(struct z_virt_dev *vdev, uint64_t vm_dev_base, + uint64_t vm_dev_size, struct z_vm *vm); + +int vm_vdev_pause(struct z_vcpu *vcpu); + +/** + * @brief Handle VM's device memory access. When pa_addr is + * located at a idle device, something need to do: + * 1. Building a stage-2 translation table for this vm, which + * can directly access this memory later. + * 2. Rerun the fault code and access the physical device memory. +*/ +int handle_vm_device_emulate(struct z_vm *vm, uint64_t pa_addr); + +void virt_device_irq_callback_data_set(int irq, int priority, void *user_data); + +int vm_device_init(struct z_vm *vm); +int vm_device_deinit(struct z_vm *vm); + +#endif /* ZEPHYR_INCLUDE_ZVM_VM_DEVICE_H_ */ diff --git a/include/zephyr/zvm/vm_irq.h b/include/zephyr/zvm/vm_irq.h new file mode 100644 index 00000000000000..55de4f2621c578 --- /dev/null +++ b/include/zephyr/zvm/vm_irq.h @@ -0,0 +1,145 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_VM_IRQ_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_VM_IRQ_H_ + +#include +#include +#include +#include +#include + +/*TODO: HW_FLAG may not enbaled for each spi.*/ +#define VIRQ_HW_FLAG BIT(0) +#define VIRQ_PENDING_FLAG BIT(1) +#define VIRQ_ACTIVED_FLAG BIT(2) +#define VIRQ_ENABLED_FLAG BIT(3) +#define VIRQ_WAKEUP_FLAG BIT(4) + +/* Hardware irq states */ +#define VIRQ_STATE_INVALID (0b00) +#define VIRQ_STATE_PENDING (0b01) +#define VIRQ_STATE_ACTIVE (0b10) +#define VIRQ_STATE_ACTIVE_AND_PENDING (0b11) + +/* VM's injuct irq num, bind to the register */ +#define VM_INVALID_DESC_ID (0xFF) + +/* VM's irq prio */ +#define VM_DEFAULT_LOCAL_VIRQ_PRIO (0x20) + +/* irq number for arm64 system. */ +#define VM_LOCAL_VIRQ_NR (VM_SGI_VIRQ_NR + VM_PPI_VIRQ_NR) +#define VM_GLOBAL_VIRQ_NR (VM_LOCAL_VIRQ_NR + VM_SPI_VIRQ_NR) + +struct z_vm; +struct z_virt_dev; + +/** + * @brief Description for each virt irq descripetor. + */ +struct virt_irq_desc { + /* Id that describes the irq. */ + uint8_t id; + uint8_t vcpu_id; + uint8_t vm_id; + uint8_t prio; + /* software dev trigger level flag */ + uint8_t vdev_trigger; + + /* irq level type */ + uint8_t type; + uint8_t src_cpu; + + /* hardware virq states */ + uint8_t virq_states; + /* software virq flags */ + uint32_t virq_flags; + + /** + * If physical irq is existed, pirq_num has + * a value, otherwise, it is set to 0xFFFFFFFF + */ + uint32_t pirq_num; + uint32_t virq_num; + + sys_dnode_t desc_node; +}; + +/* vcpu wfi struct */ +struct vcpu_wfi { + bool state; + uint16_t yeild_count; + struct k_spinlock wfi_lock; + void *priv; +}; + +/** + * @brief vm's irq block to describe this all the device interrup + * for vm. In this struct, it called `VM_LOCAL_VIRQ_NR`; +*/ +struct vcpu_virt_irq_block { + /** + * record the virt irq counts which is actived, + * when a virt irq is sent to vm, zvm should record + * it and it means there is a virt irq need to process. + */ + uint32_t virq_pending_counts; + uint32_t pending_sgi_num; + + struct virt_irq_desc vcpu_virt_irq_desc[VM_LOCAL_VIRQ_NR]; + struct vcpu_wfi vwfi; + + struct k_spinlock spinlock; + + sys_dlist_t active_irqs; + sys_dlist_t pending_irqs; +}; + +/** + * @brief vm's irq block to describe this all the device interrup + * for vm. In this struct, it called `VM_GLOBAL_VIRQ_NR-VM_LOCAL_VIRQ_NR`; +*/ +struct vm_virt_irq_block { + + bool enabled; + bool irq_bitmap[VM_GLOBAL_VIRQ_NR]; + + /* interrupt control block flag */ + uint32_t flags; + uint32_t irq_num; + uint32_t cpu_num; + + uint32_t irq_target[VM_GLOBAL_VIRQ_NR]; + uint32_t ipi_vcpu_source[CONFIG_MP_NUM_CPUS][VM_SGI_VIRQ_NR]; + + /* virtual irq block. */ + struct k_spinlock vm_virq_lock; + + /* desc for this vm */ + struct virt_irq_desc vm_virt_irq_desc[VM_GLOBAL_VIRQ_NR-VM_LOCAL_VIRQ_NR]; + + /* bind to interrupt controller */ + void *virt_priv_date; +}; + +bool vcpu_irq_exist(struct z_vcpu *vcpu); + +int vcpu_wait_for_irq(struct z_vcpu *vcpu); + +/** + * @brief init the irq desc when add vm_dev. +*/ +void vm_device_irq_init(struct z_vm *vm, struct z_virt_dev *vm_dev); + +/** + * @brief init irq block for vm. + */ +int vm_irq_block_init(struct z_vm *vm); + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_VM_IRQ_H_ */ diff --git a/include/zephyr/zvm/vm_manager.h b/include/zephyr/zvm/vm_manager.h new file mode 100644 index 00000000000000..72e09a44f129f7 --- /dev/null +++ b/include/zephyr/zvm/vm_manager.h @@ -0,0 +1,73 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_VM_MANAGER_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_VM_MANAGER_H_ + +#include +#include +#include +#include +#include +#include + +/** + * @brief init vm struct. + * When creating a vm, we must load the vm image and parse it + * if it is a complicated system. + */ +typedef void (*vm_init_t)(struct z_vm *vm); +typedef void (*vcpu_init_t)(struct z_vcpu *vcpu); +typedef void (*vcpu_run_t)(struct z_vcpu *vcpu); +typedef void (*vcpu_halt_t)(struct z_vcpu *vcpu); + +/** + * @brief VM mapping vir_ad to phy_ad. + */ +typedef int (*vm_mmap_t)(struct vm_mem_domain *vmem_domain); +typedef void (*vmm_init_t)(struct z_vm *vm); +typedef int (*vint_init_t)(struct z_vcpu *vcpu); +typedef int (*vtimer_init_t)(struct z_vcpu *vcpu); + +/** + * @brief VM's vcpu ops function + */ +struct vm_ops { + vm_init_t vm_init; + + vcpu_init_t vcpu_init; + vcpu_run_t vcpu_run; + vcpu_halt_t vcpu_halt; + + vmm_init_t vmm_init; + vm_mmap_t vm_mmap; + + /* @TODO maybe add load/restor func later */ + vint_init_t vint_init; + vtimer_init_t vtimer_init; +}; + + +int zvm_new_guest(size_t argc, char **argv); +int zvm_run_guest(size_t argc, char **argv); +int zvm_pause_guest(size_t argc, char **argv); +int zvm_delete_guest(size_t argc, char **argv); +int zvm_info_guest(size_t argc, char **argv); + + +/** + * @brief shutdown guest + */ +void zvm_shutdown_guest(struct z_vm *vm); + +/** + * @brief reset guset + */ +void zvm_reboot_guest(struct z_vm *vm); + + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_VM_MANAGER_H_ */ diff --git a/include/zephyr/zvm/vm_mm.h b/include/zephyr/zvm/vm_mm.h new file mode 100644 index 00000000000000..85a19e6d168ba1 --- /dev/null +++ b/include/zephyr/zvm/vm_mm.h @@ -0,0 +1,157 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_ZVM_VM_MM_H_ +#define ZEPHYR_INCLUDE_ZVM_VM_MM_H_ + +#include +#include +#include +#include + +/* Memory size for block */ +#define BLK_MAP 0x01000000 +#define PGE_MAP 0x02000000 + +/* Memory block size: 64K */ +#define DEFAULT_BLK_MEM_SHIFT (12) +#define ZEPHYR_BLK_MEM_SHIFT (16) +#define LINUX_BLK_MEM_SHIFT (21) + +#define DEFAULT_VM_BLOCK_SIZE (1UL << DEFAULT_BLK_MEM_SHIFT) //4K +#define ZEPHYR_VM_BLOCK_SIZE (1UL << ZEPHYR_BLK_MEM_SHIFT) //64K +#define LINUX_VM_BLOCK_SIZE (1UL << LINUX_BLK_MEM_SHIFT) //2M + +/* For clear warning for unknow reason */ +struct z_vm; + +/** + * @brief vm_mem_block record the translation relation of virt addr to phy addr + */ +struct vm_mem_block { + uint8_t *phy_pointer; + + /* block num of this vpart */ + uint64_t cur_blk_offset; + + uint64_t phy_base; + uint64_t virt_base; + + /* block list of this vpart */ + sys_dnode_t vblk_node; +}; + +/** + * @brief vwork_mm_area store one of VM task area + */ +struct vm_mem_partition { + + /* Virtual memory info for this vpart. */ + struct k_mem_partition *vm_mm_partition; + + /** + * Store the physical memory info for + * this vpart. It is not the image's base + * and size, but the physical memory allocate + * to vm. + */ + uint64_t part_hpa_base; + uint64_t part_hpa_size; + + /* the vm_mem_partition node link to vm mm */ + sys_dnode_t vpart_node; + + /* mem_block lists for physical memmory management */ + sys_dlist_t blk_list; + + /* vwork_mm_area belong to one vmem_domain */ + struct vm_mem_domain *vmem_domain; + +}; + +/** + * @brief vm_mem_domain describe the full virtual address space of the vm. + */ +struct vm_mem_domain { + bool is_init; + + /* A vm is bind to a domain */ + struct k_mem_domain *vm_mm_domain; + uint64_t pgd_addr; + + /** + * vm_mem_partition list for mapped and idle list, + * some dev that used will in mapped list, otherwise in idle list. + */ + sys_dlist_t idle_vpart_list; + sys_dlist_t mapped_vpart_list; + + struct k_spinlock spin_mmlock; + struct z_vm *vm; +}; + +/** + * @brief Map/unMap virtual addr 'vpart' to physical addr 'phy'. + * this function aim to build/release the page table for + * virt addr to phys addr. + */ +int map_vpart_to_block(struct vm_mem_domain *vmem_domain, struct vm_mem_partition *vpart, uint64_t unit_msize); +int unmap_vpart_to_block(struct vm_mem_domain *vmem_domain, struct vm_mem_partition *vpart); + +/** + * @brief Create the vdev memory partition. + */ +int vm_vdev_mem_create(struct vm_mem_domain *vmem_domain, uint64_t hpbase, + uint64_t ipbase, uint64_t size, uint32_t attrs); + +/** + * @brief Init vm mm struct for this vm. + * This function init the vm_mm struct for vm. Including init vpart list and + * set the origin virtual space for vm, call alloc_vm_mem_partition func to init + * specific vpart struct and add it to unused vpart list. + * 1.Set the total vm address space for this vm. + * 2.Add it to the origin vpart space. + * + * @param vm : vm struct for store vm_mm struct + */ +int vm_mem_domain_create(struct z_vm *vm); + +/** + * @brief add mm area to this vm's mm space. + * This function do not used only by vm init function, if we want to + * add memory area for this vm, we should use it too. And it will allocate + * a memory area for the user, and add it to used vpart list and ready to memory map. + * + * @param vmem_dm: the vm's mm space struct. + * + * @return int: error code. + */ +int vm_dynmem_apart_add(struct vm_mem_domain *vmem_dm); + +/* Add area partitions to vm memory domain */ +int vm_mem_domain_partitions_add(struct vm_mem_domain *vmem_dm); + +/* Remove area partition from the vm memory struct */ +int vm_mem_apart_remove(struct vm_mem_domain *vmem_dm); + +/** + * @brief init vm's domain + */ +int arch_vm_mem_domain_init(struct k_mem_domain *domain, uint32_t vmid); + +/** + * @brief translate guest physical address to host physical address. + */ +uint64_t vm_gpa_to_hpa(struct z_vm *vm, uint64_t gpa, struct vm_mem_partition *vpart); + +void vm_host_memory_read(uint64_t hpa, void *dst, size_t len); +void vm_host_memory_write(uint64_t hpa, void *src, size_t len); + +void vm_guest_memory_read(struct z_vm *vm, uint64_t gpa, void *dst, size_t len); +void vm_guest_memory_write(struct z_vm *vm, uint64_t gpa, void *src, size_t len); + +#endif /* ZEPHYR_INCLUDE_ZVM_VM_MM_H_ */ diff --git a/include/zephyr/zvm/zlog.h b/include/zephyr/zvm/zlog.h new file mode 100644 index 00000000000000..8d552283246b36 --- /dev/null +++ b/include/zephyr/zvm/zlog.h @@ -0,0 +1,46 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_ZLOG_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_ZLOG_H_ + +#include + +#ifdef CONFIG_LOG + +#define ZVM_LOG_ERR(...) LOG_ERR(__VA_ARGS__) +#define ZVM_LOG_WARN(...) LOG_WRN(__VA_ARGS__) +#ifdef CONFIG_ZVM_DEBUG_LOG_INFO +#define ZVM_LOG_INFO(...) LOG_PRINTK(__VA_ARGS__) +#else +#define ZVM_LOG_INFO(...) +#endif + +#else +#define ZVM_LOG_ERR(format, ...) \ +do {\ + DEBUG("\033[31m[ERR:]File:%s Line:%d. " format "\n\033[0m", __FILE__, \ + __LINE__, ##__VA_ARGS__);\ +} while(0); +#define ZVM_LOG_WARN(format, ...) \ +do {\ + DEBUG("\033[33m[WRN:]File:%s Line:%d. " format "\n\033[0m", __FILE__, \ + __LINE__, ##__VA_ARGS__);\ +} while(0); +#ifdef CONFIG_ZVM_DEBUG_LOG_INFO +#define ZVM_LOG_INFO(format, ...) \ +do {\ + DEBUG("\033[34m[INFO:]File:%s Line:%d. " format "\n\033[0m", __FILE__, \ + __LINE__, ##__VA_ARGS__);\ +} while(0); +#else +#define ZVM_LOG_INFO(...) +#endif + +#endif + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_ZLOG_H_ */ \ No newline at end of file diff --git a/include/zephyr/zvm/zvm.h b/include/zephyr/zvm/zvm.h new file mode 100644 index 00000000000000..60619f48957c20 --- /dev/null +++ b/include/zephyr/zvm/zvm.h @@ -0,0 +1,212 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_VIRTUALIZATION_H_ +#define ZEPHYR_INCLUDE_VIRTUALIZATION_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ZVM_MODULE_NAME zvm_host +#define SINGLE_CORE 1U +#define DT_MB (1024 * 1024) + +/** + * @brief Spinlock initialization for smp + */ +#if defined(CONFIG_SMP) && defined(CONFIG_SPIN_VALIDATE) +#define ZVM_SPINLOCK_INIT(dev) \ +({ \ + struct k_spinlock *lock = (struct k_spinlock *)dev; \ + lock->locked = 0x0; \ + lock->thread_cpu = 0x0; \ +}) +#elif defined(CONFIG_SMP) && !defined(CONFIG_SPIN_VALIDATE) +#define ZVM_SPINLOCK_INIT(dev) \ +({ \ + struct k_spinlock *lock = (struct k_spinlock *)dev; \ + lock->locked = 0x0; \ +}) +#else +#define ZVM_SPINLOCK_INIT(dev) +#endif + +struct z_os_info; + +extern struct z_kernel _kernel; +extern struct zvm_manage_info *zvm_overall_info; + +/* ZVM's functions */ +typedef void (*zvm_new_vm_t)(size_t argc, char **argv); +typedef void (*zvm_run_vm_t)(uint32_t vmid); +typedef void (*zvm_update_vm_t)(size_t argc, char **argv); +typedef void (*zvm_info_vm_t)(size_t argc, char **argv); +typedef void (*zvm_pause_vm_t)(uint32_t vmid); +typedef void (*zvm_halt_vm_t)(size_t argc, char **argv); +typedef void (*zvm_delete_vm_t)(uint32_t vmid); + + +/** + * @brief zvm_hwsys_info stores basic information about the ZVM. + * + * What hardware resource we concern about now includes CPU and memory(named + * sram in dts file), such as CPU's compatible property and memory size. Other + * devices we do not care currently. Then we need a structure to store basic + * information of hardware. + */ +struct zvm_hwsys_info { + char *cpu_type; + uint16_t phy_cpu_num; + uint64_t phy_mem; + uint64_t phy_mem_used; +}; + +/** + * @brief General operations for virtual machines. + */ +struct zvm_ops { + zvm_new_vm_t new_vm; + zvm_run_vm_t run_vm; + zvm_update_vm_t update_vm; + zvm_info_vm_t info_vm; + zvm_pause_vm_t pause_vm; + zvm_halt_vm_t halt_vm; + zvm_delete_vm_t delete_vm; +}; + +/* + * @brief ZVM manage structure. + * + * As a hypervisor, zvm should know how much resouce it can use and how many vm + * it carries. + * At first aspect, File subsys/_zvm_zvm_host/zvm_host.c can get hardware info + * from devicetree file. We construct corresponding data structure type + * "struct zvm_hwsys_info" to store it. "struct zvm_hwsys_info" includes: + * -> the number of total vm + * -> the number of physical CPU + * -> system's CPU typename + * -> the number of physical memory + * -> how much physical memory has been used + * and so on. + * At second aspect, we should konw what kind of resource vm possess is proper. + * Then we construct a proper data structure, just like "vm_info_t", to describe + * it. It includes information as below: + * -> ... + */ +struct zvm_manage_info { + + /* The hardware infomation of this device */ + struct zvm_hwsys_info *hw_info; + + struct z_vm *vms[CONFIG_MAX_VM_NUM]; + + /* TODO: try to add a flag to describe the running vm and pending vm list */ + + /** Each bit of this value represents a virtual machine id. + * When the value of a bit is 1, + * the ID of that virtual machine has been allocated, and vice versa. + */ + uint32_t alloced_vmid; + + /* total num of vm in system */ + uint32_t vm_total_num; + struct k_spinlock spin_zmi; +}; + +void zvm_ipi_handler(void); +struct zvm_dev_lists* get_zvm_dev_lists(void); + +int load_os_image(struct z_vm *vm); + +static ALWAYS_INLINE bool is_vmid_full(void) +{ + return zvm_overall_info->alloced_vmid == BIT_MASK(CONFIG_MAX_VM_NUM); +} + +static ALWAYS_INLINE uint32_t find_next_vmid(struct z_os_info *vm_info, uint32_t *vmid) +{ + uint32_t id, maxid = BIT(CONFIG_MAX_VM_NUM); + + for (id = BIT(0), *vmid = 0; id < maxid; id <<= 1, (*vmid)++) { + if (!(id & zvm_overall_info->alloced_vmid)) { + zvm_overall_info->alloced_vmid |= id; + return 0; + } + } + return -EOVERFLOW; +} + +/** + * @brief Allocate a unique vmid for this VM. + * TODO: Need atomic op to vmid. + */ +static ALWAYS_INLINE uint32_t allocate_vmid(struct z_os_info *vm_info) { + int err; + uint32_t res; + k_spinlock_key_t key; + + if (unlikely(is_vmid_full())) { + return CONFIG_MAX_VM_NUM; /* Value overflow. */ + } + + key = k_spin_lock(&zvm_overall_info->spin_zmi); + err = find_next_vmid(vm_info, &res); + if (err) { + k_spin_unlock(&zvm_overall_info->spin_zmi, key); + return CONFIG_MAX_VM_NUM; + } + + zvm_overall_info->vm_total_num++; + + k_spin_unlock(&zvm_overall_info->spin_zmi, key); + + return res; +} + +static ALWAYS_INLINE struct z_vm *get_vm_by_id(uint32_t vmid) { + if (unlikely(vmid >= CONFIG_MAX_VM_NUM)){ + return NULL; + } + return zvm_overall_info->vms[vmid]; +} + +static uint32_t pcpu_list[CONFIG_MP_NUM_CPUS] = {0}; + +static ALWAYS_INLINE void set_all_cache_clean(){ + for(int i = 0; i < CONFIG_MP_NUM_CPUS; i++){ + pcpu_list[i] = 1; + } +} + +static ALWAYS_INLINE void set_cpu_cache_clean(int pcpu_id){ + pcpu_list[pcpu_id] = 1; +} + +static ALWAYS_INLINE void reset_cache_clean(int pcpu_id){ + pcpu_list[pcpu_id] = 0; +} + +static ALWAYS_INLINE uint32_t get_cache_clean(int pcpu_id){ + return pcpu_list[pcpu_id]; +} + +void set_all_pcpu_cache_clean(void); + +int get_pcpu_cache_clean(uint64_t cpu_mpidr); + +void reset_pcpu_cache_clean(uint64_t cpu_mpidr); + +#endif /* ZEPHYR_INCLUDE_VIRTUALIZATION_H_ */ diff --git a/kernel/include/kernel_offsets.h b/kernel/include/kernel_offsets.h index 3cadf7e12a0bc7..b2df69955cdca6 100644 --- a/kernel/include/kernel_offsets.h +++ b/kernel/include/kernel_offsets.h @@ -70,6 +70,10 @@ GEN_OFFSET_SYM(_thread_t, stack_info); GEN_OFFSET_SYM(_thread_t, tls); #endif /* CONFIG_THREAD_LOCAL_STORAGE */ +#ifdef CONFIG_ZVM +GEN_OFFSET_SYM(_thread_t, vcpu_struct); +#endif + GEN_ABSOLUTE_SYM(__z_interrupt_stack_SIZEOF, sizeof(z_interrupt_stacks[0])); GEN_ABSOLUTE_SYM(__z_interrupt_all_stacks_SIZEOF, sizeof(z_interrupt_stacks)); diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h index 7d5a880a229cce..7b1d45c84b55a3 100644 --- a/kernel/include/ksched.h +++ b/kernel/include/ksched.h @@ -67,6 +67,8 @@ struct k_thread *z_swap_next_thread(void); void z_thread_abort(struct k_thread *thread); void move_thread_to_end_of_prio_q(struct k_thread *thread); bool thread_is_sliceable(struct k_thread *thread); +void dequeue_ready_thread(struct k_thread *thread); +bool is_thread_active_elsewhere(struct k_thread *thread); static inline void z_reschedule_unlocked(void) { diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h index d3638b6179a57b..e2cb47088417e7 100644 --- a/kernel/include/kswap.h +++ b/kernel/include/kswap.h @@ -260,6 +260,10 @@ static inline void z_dummy_thread_init(struct k_thread *dummy_thread) dummy_thread->base.slice_ticks = 0; #endif /* CONFIG_TIMESLICE_PER_THREAD */ +#ifdef CONFIG_ZVM + dummy_thread->vcpu_struct = NULL; +#endif + _current_cpu->current = dummy_thread; } #endif /* ZEPHYR_KERNEL_INCLUDE_KSWAP_H_ */ diff --git a/kernel/init.c b/kernel/init.c index 8b7d62b867118e..94a14952ec8ba3 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -681,6 +681,10 @@ FUNC_NORETURN void z_cstart(void) timing_start(); #endif /* CONFIG_TIMING_FUNCTIONS_NEED_AT_BOOT */ +#ifdef CONFIG_ZVM + _current->vcpu_struct = NULL; +#endif + #ifdef CONFIG_MULTITHREADING switch_to_main_thread(prepare_multithreading()); #else diff --git a/kernel/ipi.c b/kernel/ipi.c index ee01c4594251ca..d40f838ecd2e5b 100644 --- a/kernel/ipi.c +++ b/kernel/ipi.c @@ -7,6 +7,9 @@ #include #include #include +#ifdef CONFIG_ZVM +#include +#endif #ifdef CONFIG_TRACE_SCHED_IPI extern void z_trace_sched_ipi(void); @@ -100,6 +103,10 @@ void z_sched_ipi(void) z_trace_sched_ipi(); #endif /* CONFIG_TRACE_SCHED_IPI */ +#ifdef CONFIG_ZVM + zvm_ipi_handler(); +#endif /* CONFIG_ZVM */ + #ifdef CONFIG_TIMESLICING if (thread_is_sliceable(_current)) { z_time_slice(); diff --git a/kernel/sched.c b/kernel/sched.c index 2827a41fd381bd..b303f3fad62f52 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -21,6 +21,9 @@ #include #include #include +#ifdef CONFIG_ZVM +#include +#endif LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL); @@ -926,8 +929,17 @@ void *z_get_next_switch_handle(void *interrupted) if (IS_ENABLED(CONFIG_SMP)) { old_thread->switch_handle = NULL; } - new_thread = next_up(); +#ifdef CONFIG_ZVM + if(old_thread->base.thread_state & _THREAD_VCPU_NO_SWITCH){ + old_thread->base.thread_state &= ~_THREAD_VCPU_NO_SWITCH; + new_thread = old_thread; + }else{ + new_thread = next_up(); + } +#else + new_thread = next_up(); +#endif z_sched_usage_switch(new_thread); if (old_thread != new_thread) { @@ -970,6 +982,11 @@ void *z_get_next_switch_handle(void *interrupted) #endif runq_add(old_thread); } +#ifdef CONFIG_ZVM + if(vcpu_need_switch(new_thread, old_thread)){ + do_vcpu_swap(new_thread, old_thread); + } +#endif /* CONFIG_ZVM */ } old_thread->switch_handle = interrupted; ret = new_thread->switch_handle; @@ -981,6 +998,11 @@ void *z_get_next_switch_handle(void *interrupted) signal_pending_ipi(); return ret; #else +#ifdef CONFIG_ZVM + if(vcpu_need_switch(_kernel.ready_q.cache, _current)){ + do_vcpu_swap(_kernel.ready_q.cache, _current); + } +#endif /* CONFIG_ZVM */ z_sched_usage_switch(_kernel.ready_q.cache); _current->switch_handle = interrupted; set_current(_kernel.ready_q.cache); @@ -1600,3 +1622,20 @@ int z_sched_waitq_walk(_wait_q_t *wait_q, return status; } + +bool is_thread_active_elsewhere(struct k_thread *thread) +{ + bool ret = false; + K_SPINLOCK(&_sched_spinlock) { + if (thread_active_elsewhere(thread) != NULL) { + ret = true; + } + } + return ret; +} + +void dequeue_ready_thread(struct k_thread *thread) +{ + unready_thread(thread); + signal_pending_ipi(); +} diff --git a/samples/subsys/zvm/CMakeLists.txt b/samples/subsys/zvm/CMakeLists.txt new file mode 100644 index 00000000000000..4590836b712c24 --- /dev/null +++ b/samples/subsys/zvm/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +#Add env for build zvm +set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../..) + +cmake_minimum_required(VERSION 3.20.0) +find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE}) +project(zvm) + +target_sources(app PRIVATE src/main.c) diff --git a/samples/subsys/zvm/README.rst b/samples/subsys/zvm/README.rst new file mode 100644 index 00000000000000..7935bab03a43ba --- /dev/null +++ b/samples/subsys/zvm/README.rst @@ -0,0 +1,146 @@ +.. Zephyr:code-sample:: ZVM + :name: ZVM (Zephyr-based Virtual Machine) + + Boot virtual machines (VMs) with ZVM. + This sample support run two VMs + using ZVM on the QEMU MAX board. + +Overview +********************************* + +ZVM (Zephyr-based Virtual Machine) is a new generation of Type 1.5 embedded +RTOS (real-time operating system) virtualization solution, jointly developed +with the open-source RTOS Zephyr. ZVM can deploy multiple operating systems +in a secure and isolated manner on a single hardware, providing real-time and +flexible virtualization support for multi-OS and multi-tasking. +Type 1.5 is not a compromise between low latency (Type 1) and high flexibility (Type 2), +but achieves the best of both worlds without sacrificing either: + +- ZVM does not run on Zephyr, but directly on the hardware, sharing the driver support and scheduling capabilities of the Zephyr kernel (i.e. Type 1.5 is more flexible than Type 1); meanwhile, ZVM avoids the latency overhead caused by multiple layers of dependency in Type 2 (i.e. Type 1.5 has lower latency than Type 2). + +- By combining the real-time scheduling mechanism of Zephyr RTOS and the task isolation mechanism of ZVM, real-time tasks will not be interfered with by low-priority tasks (i.e. Type 1.5 has lower latency than Type 1 and Type 2). + +Building and Running +********************************* + +Building ZVM for ARMv8.1+ Cores boards +====================================== + +ZVM requires support for ARMv8.1+ cores, such as Cortex-A55 and A76 processors. The sample can be built as follows: + +.. zephyr-app-commands:: + :zephyr-app: samples/subsys/zvm + :board: qemu_max + :goals: build + :compact: + +For other ARMv8.1+ compatible boards, you need to add the corresponding overlay files in the samples/subsys/zvm/boards directory. + +you can build the ZVM with the following command: + +.. code-block:: shell + + west build -b qemu_max/qemu_max/smp samples/subsys/zvm/ + +Running VM with ZVM +==================================== + +1. Get the VM image files +-------------------------------------- + +We provide pre-built images that can be executed directly on the platform. +Use the following method to pull the pre-built images. First, enter the home directory: + +.. code-block:: shell + + cd ~ + git clone https://github.com/hnu-esnl/zvm_images.git + +Then copy the zvm_host.elf images from the Zephyr repository to ~/zvm_images: + +.. code-block:: shell + + cp Zephyr_DIR/build/zephyr/zvm_host.elf ~/zvm_images/qemu_arm64 + +2. Boot ZVM with pre-written script file +-------------------------------------------- + +Use auto_zvm.sh to run the ZVM: + +.. code-block:: shell + + cd ~/zvm_images/qemu_arm64/ + ./auto_zvm.sh debugserver qemu_max_smp + +The following output is printed and you can use commands to create and run the VMs: + +Sample output +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: shell + + *** Booting Zephyr OS build 9f824289b28d *** + Secondary CPU core 1 (MPID:0x1) is up + Secondary CPU core 2 (MPID:0x2) is up + Secondary CPU core 3 (MPID:0x3) is up + + █████████╗ ██╗ ██╗ ███╗ ███╗ + ╚════███╔╝ ██║ ██║ ████╗ ████║ + ███╔╝ ╚██╗ ██╔╝ ██╔ ████╔██║ + ██╔╝ ╚██ ██╔╝ ██║ ╚██╔╝██║ + █████████╗ ╚████╔╝ ██║ ╚═╝ ██║ + ╚════════╝ ╚═╝ ╚═╝ ╚═╝ + + zvm_host:~# + +3. Launching and Connecting to the Corresponding VM: +------------------------------------------------------------ + +In the ZVM window, enter the following command to view the supported commands on the platform: + +.. code-block:: shell + + zvm help + +Launching Zephyr Virtual Machine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1) Create a Zephyr VM: + +.. code-block:: shell + + zvm new -t zephyr + +2) Run the Zephyr VM: + +.. code-block:: shell + + zvm run -n 0 + +(-n followed by the VM's corresponding ID, assuming the created VM's VM-ID is 0) + +3) Enter the Zephyr VM UART console: + +.. code-block:: shell + + zvm look 0 + +4) Exit the Zephyr VM: + +Enter the following command in the console: + + Ctrl + x + +.. note:: + + The ZVM project is created and led by Professor Guoqi Xie at Hunan University, China. + We would like to express our gratitude to the collaborators for their contributions + to this project. The main developers are as follows: + + - Guoqi Xie, email: xgqman@hnu.edu.cn + - Chenglai Xiong (openEuler sig-Zephyr Maintainer), email: xiongcl@hnu.edu.cn + - Wei Ren (openEuler sig-Zephyr Maintainer), email: dfrd-renw@dfmc.com.cn + - Xingyu Hu, email: huxingyu@hnu.edu.cn + - Yuhao Hu, email: ahui@hun.edu.cn + + For more information, see the `ZVM Main page `__. diff --git a/samples/subsys/zvm/boards/qemu_max_smp.conf b/samples/subsys/zvm/boards/qemu_max_smp.conf new file mode 100644 index 00000000000000..44392aa5b26fed --- /dev/null +++ b/samples/subsys/zvm/boards/qemu_max_smp.conf @@ -0,0 +1,10 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +# hypervisor need 1024M of virtual space. +CONFIG_KERNEL_VM_SIZE=0x80000000 + +# virtual device. +CONFIG_VM_VGICV3=y +CONFIG_VM_VSERIAL=y diff --git a/samples/subsys/zvm/boards/qemu_max_smp.overlay b/samples/subsys/zvm/boards/qemu_max_smp.overlay new file mode 100644 index 00000000000000..fb4ebf9d5f0590 --- /dev/null +++ b/samples/subsys/zvm/boards/qemu_max_smp.overlay @@ -0,0 +1,98 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/{ + aliases { + zephyrcpy = "/soc/zephyr_cpy@e0000000"; + linuxcpy = "/soc/linux_cpy@e4000000"; + linuxdtb = "/soc/linux_dtb@ec000000"; + linuxrfs = "/soc/linux_rootfs@f0000000"; + }; + + chosen { + vm,console = &uart0; + }; + + soc { + zephyr_cpy@e0000000 { + reg = <0x0 0xe0000000 0x0 DT_SIZE_M(4)>; + }; + + linux_cpy@e4000000 { + reg = <0x0 0xe4000000 0x0 DT_SIZE_M(64)>; + }; + + linux_dtb@ec000000 { + reg = <0x0 0xec000000 0x0 DT_SIZE_M(1)>; + }; + + linux_rootfs@f0000000 { + reg = <0x0 0xf0000000 0x0 DT_SIZE_M(200)>; + }; + + pass_through_device { + + #address-cells = <2>; + #size-cells = <2>; + ranges; + /* using pt device for vm */ + /* + uart1: uart@9001000 { + compatible = "pl011"; + reg = <0x00 0x09001000 0x00 0x1000>; + interrupts = ; + interrupt-names = "irq_0"; + clocks = <&uartclk>; + status = "okay"; + current-speed = <115200>; + label = "UART_1"; + }; + */ + }; + + }; + + vm_zephyr_space { + #address-cells = <2>; + #size-cells = <2>; + compatible = "zephyr-vm"; + dtb_address = <0x00000000>; + dtb_size = ; + vcpu_num = <0x01>; + + zephyr_ddr: memory@80000000 { + compatible = "vm-dram"; + memmap_type = "direct"; + address_type = "normal_memory"; + vm_reg_base = <0x40000000>; + vm_reg_size = ; + reg = <0x0 0x80000000 0x0 DT_SIZE_M(4)>; + label = "VM0_MEM"; + }; + }; + + vm_linux_space { + #address-cells = <2>; + #size-cells = <2>; + compatible = "linux-vm"; + dtb_address = <0xc0000000>; + dtb_size = ; + vcpu_num = <0x01>; + rootfs_address = <0x99000000>; + + linux_ddr: memory@90000000 { + compatible = "vm-dram"; + memmap_type = "direct"; + address_type = "normal_memory"; + vm_reg_base = <0x40000000>; + vm_reg_size = ; + reg = <0x0 0x90000000 0x0 DT_SIZE_M(64)>; + label = "VM1_MEM"; + }; + + }; +}; diff --git a/samples/subsys/zvm/dts/bindings/linux-vm.yaml b/samples/subsys/zvm/dts/bindings/linux-vm.yaml new file mode 100644 index 00000000000000..9e520000e1cd86 --- /dev/null +++ b/samples/subsys/zvm/dts/bindings/linux-vm.yaml @@ -0,0 +1,28 @@ +description: linux virtual machine space + +compatible: "linux-vm" + +properties: + dtb_address: + type: int + description: | + Base address of the VM device tree file. + required: true + + dtb_size: + type: int + description: | + Size of the VM device tree file. + required: true + + vcpu_num: + type: int + description: | + Number of virtual CPUs for each Linux VM (supports SMP). + required: true + + rootfs_address: + type: int + description: | + Base address of the VM root filesystem. + required: true diff --git a/samples/subsys/zvm/dts/bindings/vm-dram.yaml b/samples/subsys/zvm/dts/bindings/vm-dram.yaml new file mode 100644 index 00000000000000..7dd5e77213d46d --- /dev/null +++ b/samples/subsys/zvm/dts/bindings/vm-dram.yaml @@ -0,0 +1,40 @@ +description: Virtual Machine DRAM + +compatible: "vm-dram" + +properties: + reg: + type: array + required: true + description: | + Memory regions for the VM. + + label: + type: string + required: true + description: | + Human-readable name for the VM DRAM. + + memmap_type: + type: string + required: true + description: | + Type of memory mapping for the VM domain. + + vm_reg_base: + type: int + required: true + description: | + Guest VM's physical memory base, equivalent to the common `memory reg base` in the DTS. + + vm_reg_size: + type: int + required: true + description: | + Guest VM's physical memory size, equivalent to the common `memory reg size` in the DTS. + + address_type: + type: string + required: true + description: | + Type of virtual address for the VM. diff --git a/samples/subsys/zvm/dts/bindings/zephyr-vm.yaml b/samples/subsys/zvm/dts/bindings/zephyr-vm.yaml new file mode 100644 index 00000000000000..8dccb33d442f05 --- /dev/null +++ b/samples/subsys/zvm/dts/bindings/zephyr-vm.yaml @@ -0,0 +1,22 @@ +description: Zephyr Virtual Machine Space + +compatible: "zephyr-vm" + +properties: + dtb_address: + type: int + description: | + Base address of the VM device tree file. + required: true + + dtb_size: + type: int + description: | + Size of the VM device tree file. + required: true + + vcpu_num: + type: int + description: | + Number of virtual CPUs for each Zephyr VM (supports SMP). + required: true diff --git a/samples/subsys/zvm/prj.conf b/samples/subsys/zvm/prj.conf new file mode 100644 index 00000000000000..e7f405cb8864c6 --- /dev/null +++ b/samples/subsys/zvm/prj.conf @@ -0,0 +1,53 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +# ZVM +CONFIG_ZVM=y +CONFIG_ZVM_DEBUG_LOG_INFO=y +CONFIG_MAX_VM_NUM=2 +CONFIG_MAX_VCPU_PER_VM=2 +CONFIG_VM_DTB_FILE_INPUT=y +CONFIG_KERNEL_BIN_NAME="zvm_host" + +CONFIG_CONSOLE=y +CONFIG_CONSOLE_SUBSYS=y +CONFIG_CONSOLE_GETLINE=y +CONFIG_UART_CONSOLE=y +CONFIG_SHELL=y +CONFIG_SHELL_GETOPT=y +CONFIG_SHELL_THREAD_PRIORITY_OVERRIDE=y +CONFIG_SHELL_THREAD_PRIORITY=4 +CONFIG_SHELL_PROMPT_UART="zvm_host:~#" +CONFIG_LOG=y +CONFIG_LOG_MODE_MINIMAL=y +CONFIG_ASSERT=y + +CONFIG_MINIMAL_LIBC=y +CONFIG_MINIMAL_LIBC_RAND=y +CONFIG_COMMON_LIBC_MALLOC_ARENA_SIZE=4096 +CONFIG_GETOPT_LONG=y +CONFIG_POSIX_C_LIB_EXT=y + +CONFIG_PRINTK_BUFFER_SIZE=128 +CONFIG_SHELL_CMD_BUFF_SIZE=4096 +CONFIG_SHELL_PRINTF_BUFF_SIZE=128 +CONFIG_ISR_STACK_SIZE=8192 +CONFIG_SHELL_STACK_SIZE=8192 +CONFIG_SYSTEM_WORKQUEUE_STACK_SIZE=8192 +CONFIG_PRIVILEGED_STACK_SIZE=8192 +# 64MB heap size. +CONFIG_HEAP_MEM_POOL_SIZE=67108864 + +CONFIG_THREAD_NAME=y +CONFIG_THREAD_STACK_INFO=y +CONFIG_SCHED_CPU_MASK=y +CONFIG_SCHED_CPU_MASK_PIN_ONLY=y +CONFIG_NUM_PREEMPT_PRIORITIES=15 +CONFIG_TIMING_FUNCTIONS=y +CONFIG_SCHED_DEADLINE=y +CONFIG_NO_OPTIMIZATIONS=y +CONFIG_DYNAMIC_INTERRUPTS=y + +CONFIG_MAX_XLAT_TABLES=8192 +CONFIG_MAX_DOMAIN_PARTITIONS=16 diff --git a/samples/subsys/zvm/src/main.c b/samples/subsys/zvm/src/main.c new file mode 100644 index 00000000000000..af49bb74e22f06 --- /dev/null +++ b/samples/subsys/zvm/src/main.c @@ -0,0 +1,28 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +static void zvm_print_log(void) +{ + printk("\n"); + printk("\n"); + printk("█████████╗ ██╗ ██╗ ███╗ ███╗ \n"); + printk("╚════███╔╝ ██║ ██║ ████╗ ████║ \n"); + printk(" ███╔╝ ╚██╗ ██╔╝ ██╔ ████╔██║ \n"); + printk(" ██╔╝ ╚██ ██╔╝ ██║ ╚██╔╝██║ \n"); + printk("█████████╗ ╚████╔╝ ██║ ╚═╝ ██║ \n"); + printk("╚════════╝ ╚═╝ ╚═╝ ╚═╝ \n"); + +} + +int main(int argc, char **argv) +{ + zvm_print_log(); + return 0; +} diff --git a/soc/arm/qemu_max/CMakeLists.txt b/soc/arm/qemu_max/CMakeLists.txt new file mode 100644 index 00000000000000..3c1abe4bc7a1ee --- /dev/null +++ b/soc/arm/qemu_max/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +zephyr_library_sources_ifdef(CONFIG_ARM_MMU mmu_regions.c) + +zephyr_include_directories(.) + +set(SOC_LINKER_SCRIPT ${ZEPHYR_BASE}/include/zephyr/arch/arm64/scripts/linker.ld CACHE INTERNAL "") diff --git a/soc/arm/qemu_max/Kconfig b/soc/arm/qemu_max/Kconfig new file mode 100644 index 00000000000000..5a74a401ffab04 --- /dev/null +++ b/soc/arm/qemu_max/Kconfig @@ -0,0 +1,9 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +config SOC_QEMU_MAX + select ARM64 + select CPU_CORTEX_A55 + select QEMU_TARGET + select GIC_V3 diff --git a/soc/arm/qemu_max/Kconfig.defconfig b/soc/arm/qemu_max/Kconfig.defconfig new file mode 100644 index 00000000000000..ec087bb7790aa1 --- /dev/null +++ b/soc/arm/qemu_max/Kconfig.defconfig @@ -0,0 +1,24 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +if SOC_QEMU_MAX + +config NUM_IRQS + # must be >= the highest interrupt number used + # - include the UART interrupts + default 256 + +config SYS_CLOCK_HW_CYCLES_PER_SEC + default 62500000 + +# Workaround for not being able to have commas in macro arguments +DT_CHOSEN_Z_FLASH := zephyr,flash + +config FLASH_SIZE + default $(dt_chosen_reg_size_int,$(DT_CHOSEN_Z_FLASH),0,K) + +config FLASH_BASE_ADDRESS + default $(dt_chosen_reg_addr_hex,$(DT_CHOSEN_Z_FLASH)) + +endif # SOC_QEMU_MAX diff --git a/soc/arm/qemu_max/Kconfig.soc b/soc/arm/qemu_max/Kconfig.soc new file mode 100644 index 00000000000000..44af0f097709d0 --- /dev/null +++ b/soc/arm/qemu_max/Kconfig.soc @@ -0,0 +1,10 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +config SOC_QEMU_MAX + bool + select SOC_FAMILY_ARM64 + +config SOC + default "qemu_max" if SOC_QEMU_MAX diff --git a/soc/arm/qemu_max/mmu_regions.c b/soc/arm/qemu_max/mmu_regions.c new file mode 100644 index 00000000000000..e809d0b8a72673 --- /dev/null +++ b/soc/arm/qemu_max/mmu_regions.c @@ -0,0 +1,34 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +static const struct arm_mmu_region mmu_regions[] = { + + MMU_REGION_FLAT_ENTRY("GIC", + DT_REG_ADDR_BY_IDX(DT_INST(0, arm_gic), 0), + DT_REG_SIZE_BY_IDX(DT_INST(0, arm_gic), 0), + MT_DEVICE_nGnRnE | MT_P_RW_U_NA | MT_DEFAULT_SECURE_STATE), + + MMU_REGION_FLAT_ENTRY("GIC", + DT_REG_ADDR_BY_IDX(DT_INST(0, arm_gic), 1), + DT_REG_SIZE_BY_IDX(DT_INST(0, arm_gic), 1), + MT_DEVICE_nGnRnE | MT_P_RW_U_NA | MT_DEFAULT_SECURE_STATE), + + MMU_REGION_FLAT_ENTRY("UART0", + DT_REG_ADDR(DT_INST(0, arm_pl011)), + DT_REG_SIZE(DT_INST(0, arm_pl011)), + MT_DEVICE_nGnRnE | MT_P_RW_U_NA | MT_DEFAULT_SECURE_STATE), + +}; + +const struct arm_mmu_config mmu_config = { + .num_regions = ARRAY_SIZE(mmu_regions), + .mmu_regions = mmu_regions, +}; diff --git a/soc/arm/soc.yml b/soc/arm/soc.yml index c0b2456501e0cd..d9e43eeaee5fad 100644 --- a/soc/arm/soc.yml +++ b/soc/arm/soc.yml @@ -30,4 +30,5 @@ family: - name: fvp_aemv8r_aarch32 socs: - name: qemu_cortex_a53 + - name: qemu_max - name: qemu_virt_arm64 diff --git a/subsys/CMakeLists.txt b/subsys/CMakeLists.txt index 584559fa50e672..23220109cae076 100644 --- a/subsys/CMakeLists.txt +++ b/subsys/CMakeLists.txt @@ -32,6 +32,7 @@ add_subdirectory(task_wdt) add_subdirectory(testsuite) add_subdirectory(tracing) add_subdirectory(usb) +add_subdirectory(zvm) add_subdirectory_ifdef(CONFIG_ARM_SIP_SVC_SUBSYS sip_svc) add_subdirectory_ifdef(CONFIG_BINDESC bindesc) diff --git a/subsys/Kconfig b/subsys/Kconfig index 2c708e26d59ec2..72e7e5d60d1c2e 100644 --- a/subsys/Kconfig +++ b/subsys/Kconfig @@ -51,6 +51,7 @@ source "subsys/usb/device/Kconfig" source "subsys/usb/device_next/Kconfig" source "subsys/usb/host/Kconfig" source "subsys/usb/usb_c/Kconfig" +source "subsys/zvm/Kconfig" source "subsys/zbus/Kconfig" # zephyr-keep-sorted-stop diff --git a/subsys/zvm/CMakeLists.txt b/subsys/zvm/CMakeLists.txt new file mode 100644 index 00000000000000..2fa0319c16334a --- /dev/null +++ b/subsys/zvm/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +add_subdirectory_ifdef(CONFIG_ZVM vdev) + +zephyr_sources_ifdef( + CONFIG_ZVM + vm_cpu.c + vm_device.c + vm_irq.c + vm_manager.c + vm_mm.c + vm.c + os.c + zvm.c + zvm_shell.c +) diff --git a/subsys/zvm/Kconfig b/subsys/zvm/Kconfig new file mode 100644 index 00000000000000..28c5447eac9158 --- /dev/null +++ b/subsys/zvm/Kconfig @@ -0,0 +1,46 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +source "subsys/zvm/vdev/Kconfig" + +menuconfig ZVM + bool "Zephyr Based Virtual Machine" + default n + select USERSPACE + help + Enable Zephyr virtualization support. + +if ZVM + +config ZVM_INIT_PRIORITY + int "ZVM Initialization Priority" + default 80 + help + Set the initialization priority for the ZVM module. + +config MAX_VM_NUM + int "Maximum Number of Simultaneous VMs" + range 0 32 + default 2 + help + Define the maximum number of VMs that can run simultaneously in the ZVM hypervisor. + +config MAX_VCPU_PER_VM + int "Maximum Number of vCPUs per VM" + range 1 4 + default 1 + help + Specify the maximum number of vCPUs that each VM can possess. + +config ZVM_DEBUG_LOG_INFO + bool "Enable System Boot Info Logging" + help + Enable logging of system boot information for debugging purposes. + +config VM_DTB_FILE_INPUT + bool "Parse DTB File for Linux VM" + help + Enable parsing of the Device Tree Blob (DTB) file for Linux VMs. + +endif # ZVM diff --git a/subsys/zvm/os.c b/subsys/zvm/os.c new file mode 100644 index 00000000000000..297f6c5c3d231d --- /dev/null +++ b/subsys/zvm/os.c @@ -0,0 +1,175 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +#define MB_SIZE (1024 * 1024) + +/** + * Template of guest os, now for linux and zephyr. +*/ +static struct z_os_info z_overall_vm_infos[] = { + { + .os_type = OS_TYPE_ZEPHYR, + .vcpu_num = ZEPHYR_VM_VCPU_NUM, + .vm_mem_base = ZEPHYR_VM_MEMORY_BASE, + .vm_mem_size = ZEPHYR_VM_MEMORY_SIZE, + .vm_image_base = ZEPHYR_IMAGE_BASE, + .vm_image_size = ZEPHYR_IMAGE_SIZE, + .vm_load_base = ZEPHYR_VM_LOAD_BASE, + .entry_point = ZEPHYR_VM_MEMORY_BASE, + }, + { + .os_type = OS_TYPE_LINUX, + .vcpu_num = LINUX_VM_VCPU_NUM, + .vm_mem_base = LINUX_VM_MEMORY_BASE, + .vm_mem_size = LINUX_VM_MEMORY_SIZE, + .vm_image_base = LINUX_IMAGE_BASE, + .vm_image_size = LINUX_IMAGE_SIZE, + .vm_load_base = LINUX_VM_LOAD_BASE, + .entry_point = LINUX_VM_MEMORY_BASE, + }, + +}; + +int get_os_info_by_type(struct z_os_info *vm_info) +{ + struct getopt_state *state = getopt_state_get(); + char *vm_type = state->optarg; + int ret = 0; + struct z_os_info tmp_vm_info; + + if (strcmp(vm_type, "zephyr") == 0){ + tmp_vm_info = z_overall_vm_infos[OS_TYPE_ZEPHYR]; + goto out; + } + + if (strcmp(vm_type, "linux") == 0){ + tmp_vm_info = z_overall_vm_infos[OS_TYPE_LINUX]; + goto out; + } + + ZVM_LOG_WARN("The VM type is not supported(Linux or zephyr). \n Please try again! \n"); + return -EINVAL; + +out: + vm_info->vcpu_num = tmp_vm_info.vcpu_num; + vm_info->vm_image_base = tmp_vm_info.vm_image_base; + vm_info->vm_image_size = tmp_vm_info.vm_image_size; + vm_info->vm_mem_base = tmp_vm_info.vm_mem_base; + vm_info->vm_mem_size = tmp_vm_info.vm_mem_size; + vm_info->os_type = tmp_vm_info.os_type; + vm_info->vm_load_base = tmp_vm_info.vm_load_base; + vm_info->entry_point = tmp_vm_info.entry_point; + + return ret; +} + +int load_vm_image(struct vm_mem_domain *vmem_domain, struct z_os *os) +{ + int ret = 0; + uint64_t *src_hva, des_hva; + uint64_t num_m = os->info.vm_image_size / MB_SIZE; + uint64_t src_hpa = os->info.vm_image_base; + uint64_t des_hpa = os->info.vm_load_base; + uint64_t per_size = MB_SIZE; + + ZVM_LOG_INFO("OS Image Loading ...\n"); + ZVM_LOG_INFO("Image_size = %lld MB\n", num_m); + ZVM_LOG_INFO("Image_src_hpa = 0x%llx \n", src_hpa); + ZVM_LOG_INFO("Image_des_hpa = 0x%llx \n", des_hpa); + while(num_m) { + k_mem_map_phys_bare((uint8_t **)&src_hva, (uintptr_t)src_hpa, per_size, K_MEM_CACHE_NONE | K_MEM_PERM_RW); + k_mem_map_phys_bare((uint8_t **)&des_hva, (uintptr_t)des_hpa, per_size, K_MEM_CACHE_NONE | K_MEM_PERM_RW); + memcpy((void *)des_hva, src_hva, per_size); + k_mem_unmap_phys_bare((uint8_t *)src_hva, per_size); + k_mem_unmap_phys_bare((uint8_t *)des_hva, per_size); + des_hpa += per_size; + src_hpa += per_size; + num_m--; + } + + if (os->info.os_type != OS_TYPE_LINUX){ + ZVM_LOG_INFO("OS Image Loaded, No need other file!\n"); + return ret; + } + + num_m = LINUX_VMDTB_SIZE / MB_SIZE; + src_hpa = LINUX_VMDTB_BASE; + des_hpa = LINUX_DTB_MEM_BASE; + ZVM_LOG_INFO("DTB Image Loading ...\n"); + ZVM_LOG_INFO("DTB_size = %lld MB\n", num_m); + ZVM_LOG_INFO("DTB_src_hpa = 0x%llx\n", src_hpa); + ZVM_LOG_INFO("DTB_des_hpa = 0x%llx\n", des_hpa); + while(num_m) { + k_mem_map_phys_bare((uint8_t **)&src_hva, (uintptr_t)src_hpa, per_size, K_MEM_CACHE_NONE | K_MEM_PERM_RW); + k_mem_map_phys_bare((uint8_t **)&des_hva, (uintptr_t)des_hpa, per_size, K_MEM_CACHE_NONE | K_MEM_PERM_RW); + memcpy((void *)des_hva, src_hva, per_size); + k_mem_unmap_phys_bare((uint8_t *)src_hva, per_size); + k_mem_unmap_phys_bare((uint8_t *)des_hva, per_size); + des_hpa += per_size; + src_hpa += per_size; + num_m--; + } + ZVM_LOG_INFO("Linux DTB Image Loaded !\n"); + + num_m = LINUX_VMRFS_SIZE / MB_SIZE; + src_hpa = LINUX_VMRFS_BASE; + des_hpa = LINUX_VMRFS_PHY_BASE; + ZVM_LOG_INFO("FS Image Loading ...\n"); + ZVM_LOG_INFO("FS_size = %lld MB\n", num_m); + ZVM_LOG_INFO("FS_src_hpa = 0x%llx\n", src_hpa); + ZVM_LOG_INFO("FS_des_hpa = 0x%llx\n", des_hpa); + while(num_m) { + k_mem_map_phys_bare((uint8_t **)&src_hva, (uintptr_t)src_hpa, per_size, K_MEM_CACHE_NONE | K_MEM_PERM_RW); + k_mem_map_phys_bare((uint8_t **)&des_hva, (uintptr_t)des_hpa, per_size, K_MEM_CACHE_NONE | K_MEM_PERM_RW); + memcpy((void *)des_hva, src_hva, per_size); + k_mem_unmap_phys_bare((uint8_t *)src_hva, per_size); + k_mem_unmap_phys_bare((uint8_t *)des_hva, per_size); + des_hpa += per_size; + src_hpa += per_size; + num_m--; + } + ZVM_LOG_INFO("Linux FS Image Loaded !\n"); + + return ret; +} + +int vm_os_create(struct z_os* os, struct z_os_info *vm_info) +{ + os->info.os_type = vm_info->os_type; + os->name = (char *)k_malloc(sizeof(char)*OS_NAME_LENGTH); + memset(os->name, '\0', OS_NAME_LENGTH); + + switch (os->info.os_type){ + case OS_TYPE_LINUX: + strcpy(os->name, "linux_os"); + os->is_rtos = false; + break; + case OS_TYPE_ZEPHYR: + strcpy(os->name, "zephyr_os"); + os->is_rtos = true; + break; + default: + return -ENXIO; + break; + } + os->info.vm_mem_base = vm_info->vm_mem_base; + os->info.vm_mem_size = vm_info->vm_mem_size; + os->info.vm_image_base = vm_info->vm_image_base; + os->info.vm_image_size = vm_info->vm_image_size; + os->info.vcpu_num = vm_info->vcpu_num; + os->info.entry_point = vm_info->entry_point; + os->info.vm_load_base = vm_info->vm_load_base; + return 0; +} diff --git a/subsys/zvm/vdev/CMakeLists.txt b/subsys/zvm/vdev/CMakeLists.txt new file mode 100644 index 00000000000000..cbb1c75dd33006 --- /dev/null +++ b/subsys/zvm/vdev/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +zephyr_sources_ifdef( + CONFIG_ZVM + virt_psci.c + vserial.c + vpl011.c +) + +zephyr_sources_ifdef( + CONFIG_ARM64 + vgic_common.c + vgic_v3.c +) diff --git a/subsys/zvm/vdev/Kconfig b/subsys/zvm/vdev/Kconfig new file mode 100644 index 00000000000000..95958fa49ff63a --- /dev/null +++ b/subsys/zvm/vdev/Kconfig @@ -0,0 +1,55 @@ +# Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. +# Copyright 2024-2025 openEuler SIG-Zephyr +# SPDX-License-Identifier: Apache-2.0 + +menuconfig ZVM_DEVICE_SUPPORT + bool "Zephyr Based Virtualization Device Menu" + default y + help + Enable support for Zephyr-based virtualization devices. + +config VIRT_DEVICE_INTERRUPT_DRIVEN + bool "ZVM Virtualization Device Interrupt Support" + default y + help + Enable interrupt support for ZVM virtualization devices. + This option must be selected to set the callback function + when VM devices require interrupts. + +config VM_VGICV3 + bool "VGICv3 Device Support" + help + Enable support for the VGICv3 device, which is used + when setting up a VM. + +if VM_VGICV3 + +config VM_VGICV3_INIT_PRIORITY + int "VGICv3 Initialization Priority" + default 52 + help + Set the initialization priority for the VGICv3 device. + This is evaluated at the POST_KERNEL level. + +endif + +config VM_VSERIAL + bool "VM VSERIAL that get from vm." + help + This option is used for building memory that get from overlay. + +if VM_VSERIAL + +config VM_VSERIAL_INIT_PRIORITY + int "VM virt serial init priority." + default 52 + help + When serial is init, it judge the initialization priority in POST_KERNLE. + +config VIRT_SERIAL_CTRL_INIT_PRIORITY + int "VM virt serial init priority." + default 51 + help + When serial is init, it judge the initialization priority in POST_KERNLE. + +endif diff --git a/subsys/zvm/vdev/pt_device_qemu_max.c b/subsys/zvm/vdev/pt_device_qemu_max.c new file mode 100644 index 00000000000000..8c149e01252e9e --- /dev/null +++ b/subsys/zvm/vdev/pt_device_qemu_max.c @@ -0,0 +1,129 @@ + +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +#define DEV_CFG(dev) \ + ((const struct virt_device_config * const) \ + (dev)->config) +#define DEV_DATA(dev) \ + ((struct virt_device_data *)(dev)->data) + +#define PTDEV_CFG(dev) \ + ((const struct pass_through_device_config * const) \ + (DEV_CFG(dev)->device_config)) + +/*Device init function when system bootup. */ +static int pass_through_device_init(const struct device *dev) +{ + /*set device type to res.*/ + dev->state->init_res |= VM_DEVICE_INIT_RES; + + /*init the configuration for interrupt.*/ + if(PTDEV_CFG(dev)->irq_config_func){ + PTDEV_CFG(dev)->irq_config_func(dev); + } + + printk("PT-DEVICE: Initialized pass-through device: %s. \n", dev->name); + return 0; +} + +static int vm_ptdevice_init(const struct device *dev, struct z_vm *vm, struct z_virt_dev *vdev_desc) +{ + struct z_virt_dev *vdev; + + vdev = allocate_device_to_vm(dev, vm, vdev_desc, true, false); + if(!vdev){ + printk("Init virt serial device error\n"); + return -ENODEV; + } + + if(DEV_DATA(dev)->device_data){ + struct z_virt_dev *vdev_tmp = DEV_DATA(dev)->device_data; + printk("Device data is not NULL, please check the device: %s\n", vdev_tmp->name); + } else { + DEV_DATA(dev)->device_data = vdev; + } + + /*set special function for vm device init*/ + if(PTDEV_CFG(dev)->ptdev_spec_init_func){ + PTDEV_CFG(dev)->ptdev_spec_init_func(vdev); + } + + return 0; +} + +static void pass_through_device_isr(const struct device *dev) +{ + /*irq handler.*/ + if(DEV_DATA(dev)->device_data){ + vm_device_callback_func(dev, NULL, DEV_DATA(dev)->device_data); + } else{ + printk("irq handle error, vdev is NULL, please check the device: %s\n", dev->name); + } + + /*set special function for vm device irq route*/ + if(PTDEV_CFG(dev)->ptdev_spec_irq_func){ + PTDEV_CFG(dev)->ptdev_spec_irq_func(dev); + } +} + +static const struct virt_device_api virt_ptdevice_api = { + .init_fn = vm_ptdevice_init, +}; + + +/*-----------------------------------------------------------------------*/ +/*-----------------------------------------------------------------------*/ +/*-------------------sample for adding pt device-------------------------*/ +/* +static void ptdevice_irq_config_func_1(const struct device *dev) +{ + IRQ_CONNECT(DT_IRQN(DT_ALIAS(ptdevice1)), + DT_IRQ(DT_ALIAS(ptdevice1), priority), + pass_through_device_isr, + DEVICE_DT_GET(DT_ALIAS(ptdevice1)), + 0); + irq_enable(DT_IRQN(DT_ALIAS(ptdevice1))); +} + +static struct pass_through_device_config ptdevice_cfg_port_1 = { + .irq_config_func = ptdevice_irq_config_func_1, + .ptdev_spec_init_func = NULL, + .ptdev_spec_irq_func = NULL, +}; + +static struct virt_device_config virt_ptdevice_cfg_1 = { + .reg_base = DT_REG_ADDR(DT_ALIAS(ptdevice1)), + .reg_size = DT_REG_SIZE(DT_ALIAS(ptdevice1)), + .hirq_num = DT_IRQN(DT_ALIAS(ptdevice1)), + .device_config = &ptdevice_cfg_port_1, +}; + +static struct virt_device_data virt_ptdevice_data_port_1 = { + .device_data = NULL, +}; + +DEVICE_DT_DEFINE(DT_ALIAS(ptdevice1), + &pass_through_device_init, + NULL, &virt_ptdevice_data_port_1, &virt_ptdevice_cfg_1, + POST_KERNEL, CONFIG_SERIAL_INIT_PRIORITY, + &virt_ptdevice_api); +*/ +/*------------------------cut line---------------------------------------*/ \ No newline at end of file diff --git a/subsys/zvm/vdev/vgic_common.c b/subsys/zvm/vdev/vgic_common.c new file mode 100644 index 00000000000000..04b9b9586d4e6f --- /dev/null +++ b/subsys/zvm/vdev/vgic_common.c @@ -0,0 +1,599 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include <../drivers/interrupt_controller/intc_gicv3_priv.h> +#include <../kernel/include/ksched.h> +#include +#include +#include +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +static int virt_irq_set_type(struct z_vcpu *vcpu, uint32_t offset, uint32_t *value) +{ + uint8_t lowbit_value; + int i, irq, idx_base; + uint32_t reg_val; + mem_addr_t base; + struct virt_irq_desc *desc; + + idx_base = (offset - GICD_ICFGRn) / 4; + irq = 16 * idx_base; + base = GIC_DIST_BASE; + + /** + * Per-register control 16 interrupt signals. + * TODO: This may be more simple for reduce + * time. + */ + for (i = 0; i < 16; i++, irq++) { + desc = vgic_get_virt_irq_desc(vcpu, irq); + if (!desc) { + return -ENOENT; + } + lowbit_value = (*value>>2*i) & GICD_ICFGR_MASK; + if (desc->type != lowbit_value) { + desc->type = lowbit_value; + /* If it is a hardware device interrupt */ + if (desc->virq_flags & VIRQ_HW_FLAG) { + reg_val = sys_read32(GICD_ICFGRn + (idx_base * 4)); + reg_val &= ~(GICD_ICFGR_MASK << 2*i); + if (lowbit_value){ + reg_val |= (GICD_ICFGR_TYPE << 2*i); + } + /* clear the enabled flag of interrupt */ + irq_disable(irq); + sys_write32(reg_val, GICD_ICFGRn + (idx_base*4)); + } + } + } + return 0; +} + +/** + * @breif: this type value is got from desc. + * TODO: may be direct read from vgic register. +*/ +static int virt_irq_get_type(struct z_vcpu *vcpu, uint32_t offset, uint32_t *value) +{ + int i, irq, idx_base; + struct virt_irq_desc *desc; + + idx_base = (offset-GICD_ICFGRn) / 4; + irq = 16 * idx_base; + + /*Per-register control 16 interrupt signals.*/ + for (i = 0; i < 16; i++, irq++) { + desc = vgic_get_virt_irq_desc(vcpu, irq); + if(!desc) { + continue; + } + *value = *value | (desc->type << i * 2); + } + return 0; +} + +/** + * @brief Set priority for specific virtual interrupt requests + */ +static int vgic_virq_set_priority(struct z_vcpu *vcpu, uint32_t virt_irq, int prio) +{ + struct virt_irq_desc *desc; + + desc = vgic_get_virt_irq_desc(vcpu, virt_irq); + if (!desc) { + return -ENOENT; + } + desc->prio = prio; + + return 0; +} + +static int vgic_set_virq(struct z_vcpu *vcpu, struct virt_irq_desc *desc) +{ + uint8_t lr_state; + k_spinlock_key_t key; + struct vcpu_virt_irq_block *vb = &vcpu->virq_block; + + if (!is_vm_irq_valid(vcpu->vm, desc->virq_flags)) { + ZVM_LOG_WARN("VM can not recieve virq signal, VM's name: %s.", vcpu->vm->vm_name); + return -ESRCH; + } + + key = k_spin_lock(&vb->spinlock); + lr_state = desc->virq_states; + + switch (lr_state) { + case VIRQ_STATE_INVALID: + desc->virq_flags |= VIRQ_PENDING_FLAG; + if (!sys_dnode_is_linked(&desc->desc_node)) { + sys_dlist_append(&vb->pending_irqs, &desc->desc_node); + vb->virq_pending_counts++; + } + if (desc->virq_num < VM_LOCAL_VIRQ_NR) { + /*** bug: thread may be switch to host, and vcpu is NULL. ***/ + } + break; + case VIRQ_STATE_ACTIVE: + desc->virq_flags |= VIRQ_ACTIVED_FLAG; + /* if vm interrupt is not in active list */ + if (!sys_dnode_is_linked(&desc->desc_node)) { + sys_dlist_append(&vb->pending_irqs, &desc->desc_node); + vb->virq_pending_counts++; + } + break; + case VIRQ_STATE_PENDING: + break; + case VIRQ_STATE_ACTIVE_AND_PENDING: + break; + } + k_spin_unlock(&vb->spinlock, key); + /** + * @Bug: Occur bug here: without judgement, wakeup_target_vcpu + * will introduce a pause vm error ! + * When vcpu is not bind to current cpu, we should inform the + * dest pcpu. In this situation, vCPU may run on the other pcpus + * or it is in a idle states. + */ + if(vcpu->work->vcpu_thread != _current) { + if(is_thread_active_elsewhere(vcpu->work->vcpu_thread)) { +#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED) + arch_sched_broadcast_ipi(); +#endif + }else{ + wakeup_target_vcpu(vcpu, desc); + } + } + + return 0; +} + +static int vgic_unset_virq(struct z_vcpu *vcpu, struct virt_irq_desc *desc) +{ + uint8_t lr_state; + k_spinlock_key_t key; + struct vcpu_virt_irq_block *vb = &vcpu->virq_block; + + if (!is_vm_irq_valid(vcpu->vm, desc->virq_flags)) { + ZVM_LOG_WARN("VM can not recieve virq signal, VM's name: %s.", vcpu->vm->vm_name); + return -ESRCH; + } + + key = k_spin_lock(&vb->spinlock); + lr_state = desc->virq_states; + + desc->virq_flags &= ~VIRQ_PENDING_FLAG; + desc->virq_flags &= ~VIRQ_ACTIVED_FLAG; + + if (sys_dnode_is_linked(&desc->desc_node)) { + sys_dlist_remove(&desc->desc_node); + vb->virq_pending_counts--; + } + + k_spin_unlock(&vb->spinlock, key); + + return 0; +} + +/** + * @brief set sgi interrupt to vm, which usually used on vcpu + * communication. +*/ +static bool vgic_set_sgi2vcpu(struct z_vcpu *vcpu, struct virt_irq_desc *desc) +{ + return true; +} + +static int vgic_gicd_mem_read(struct z_vcpu *vcpu, struct virt_gic_gicd *gicd, + uint32_t offset, uint64_t *v) +{ + uint32_t *value = (uint32_t *)v; + + offset += GIC_DIST_BASE; + switch (offset) { + case GICD_CTLR: + *value = vgic_sysreg_read32(gicd->gicd_regs_base, VGICD_CTLR) & ~(1 << 31); + break; + case GICD_TYPER: + *value = vgic_sysreg_read32(gicd->gicd_regs_base, VGICD_TYPER); + break; + case GICD_IIDR: + *value = vgic_sysreg_read32(gicd->gicd_regs_base, offset-GIC_DIST_BASE); + case GICD_STATUSR: + *value = 0; + break; + case GICD_ISENABLERn...(GICD_ICENABLERn - 1): + *value = 0; + break; + case GICD_ICENABLERn...(GICD_ISPENDRn - 1): + *value = 0; + break; + case (GIC_DIST_BASE+VGIC_RESERVED)...(GIC_DIST_BASE+VGIC_INMIRn - 1): + *value = vgic_sysreg_read32(gicd->gicd_base, offset-GIC_DIST_BASE); + break; + case GICD_ICFGRn...(GIC_DIST_BASE + 0x0cfc - 1): + virt_irq_get_type(vcpu, offset, value); + break; + case (GIC_DIST_BASE + VGICD_PIDR2): + *value = vgic_sysreg_read32(gicd->gicd_regs_base, VGICD_PIDR2); + break; + default: + *value = 0; + break; + } + return 0; +} + +static int vgic_gicd_mem_write(struct z_vcpu *vcpu, struct virt_gic_gicd *gicd, + uint32_t offset, uint64_t *v) +{ + uint32_t x, y, bit, t; + uint32_t *value = (uint32_t *)v; + k_spinlock_key_t key; + + key = k_spin_lock(&gicd->gicd_lock); + offset += GIC_DIST_BASE; + switch (offset) { + case GICD_CTLR: + vgic_sysreg_write32(*value, gicd->gicd_regs_base, VGICD_CTLR); + break; + case GICD_TYPER: + break; + case GICD_STATUSR: + break; + case GICD_ISENABLERn...(GICD_ICENABLERn - 1): + x = (offset - GICD_ISENABLERn) / 4; + y = x * 32; + vgic_test_and_set_enable_bit(vcpu, y, value, 32, 1, gicd); + break; + case GICD_ICENABLERn...(GICD_ISPENDRn - 1): + x = (offset - GICD_ICENABLERn) / 4; + y = x * 32; + vgic_test_and_set_enable_bit(vcpu, y, value, 32, 0, gicd); + break; + case GICD_ISPENDRn...(GICD_ICPENDRn - 1): + /* Set virt irq to vm. */ + x = (offset - GICD_ISPENDRn) / 4; + y = x * 32; + vgic_test_and_set_pending_bit(vcpu, y, value, 32, 1, gicd); + break; + case GICD_ICPENDRn...(GICD_ISACTIVERn - 1): + /* Unset virt irq to vm. */ + x = (offset - GICD_ICPENDRn) / 4; + y = x * 32; + vgic_test_and_set_pending_bit(vcpu, y, value, 32, 0, gicd); + break; + case GICD_IPRIORITYRn...(GIC_DIST_BASE + 0x07f8 - 1): + t = *value; + x = (offset - GICD_IPRIORITYRn) / 4; + y = x * 4 - 1; + bit = (t & 0x000000ff); + vgic_virq_set_priority(vcpu, y + 1, bit); + bit = (t & 0x0000ff00) >> 8; + vgic_virq_set_priority(vcpu, y + 2, bit); + bit = (t & 0x00ff0000) >> 16; + vgic_virq_set_priority(vcpu, y + 3, bit); + bit = (t & 0xff000000) >> 24; + vgic_virq_set_priority(vcpu, y + 4, bit); + break; + case GICD_ICFGRn...(GIC_DIST_BASE + 0x0cfc - 1): + virt_irq_set_type(vcpu, offset, value); + break; + case (GIC_DIST_BASE+VGIC_RESERVED)...(GIC_DIST_BASE+VGIC_INMIRn - 1): + vgic_sysreg_write32(*value, gicd->gicd_base, offset-GIC_DIST_BASE); + break; + default: + break; + } + k_spin_unlock(&gicd->gicd_lock, key); + + return 0; +} + +void arch_vdev_irq_enable(struct z_vcpu *vcpu) +{ + uint32_t irq; + struct z_vm *vm = vcpu->vm; + struct z_virt_dev *vdev; + struct _dnode *d_node, *ds_node; + + SYS_DLIST_FOR_EACH_NODE_SAFE(&vm->vdev_list, d_node, ds_node) { + vdev = CONTAINER_OF(d_node, struct z_virt_dev, vdev_node); + if(vdev->dev_pt_flag && vcpu->vcpu_id == 0) { + /* enable spi interrupt */ + irq = vdev->hirq; + if (irq > CONFIG_NUM_IRQS) { + continue; + } + arm_gic_irq_enable(irq); + } + } +} + +void arch_vdev_irq_disable(struct z_vcpu *vcpu) +{ + uint32_t irq; + struct z_vm *vm = vcpu->vm; + struct z_virt_dev *vdev; + struct _dnode *d_node, *ds_node; + + SYS_DLIST_FOR_EACH_NODE_SAFE(&vm->vdev_list, d_node, ds_node) { + vdev = CONTAINER_OF(d_node, struct z_virt_dev, vdev_node); + if(vdev->dev_pt_flag && vcpu->vcpu_id == 0) { + /* disable spi interrupt */ + irq = vdev->hirq; + if(irq > CONFIG_NUM_IRQS){ + continue; + } + arm_gic_irq_disable(irq); + } + } +} + +int vgic_vdev_mem_read(struct z_virt_dev *vdev, uint64_t addr, uint64_t *value, uint16_t size) +{ + uint32_t offset, type = TYPE_GIC_INVAILD; + struct z_vcpu *vcpu = _current_vcpu; + struct vgicv3_dev *vgic = (struct vgicv3_dev *)vdev->priv_vdev; + struct virt_gic_gicd *gicd = &vgic->gicd; + struct virt_gic_gicr *gicr; + + /*Avoid some case that we only just use '|' to get the value */ + *value = 0; + + if ((addr >= gicd->gicd_base) && (addr < gicd->gicd_base + gicd->gicd_size)) { + type = TYPE_GIC_GICD; + offset = addr - gicd->gicd_base; + } else { + gicr = get_vcpu_gicr_type(vgic, addr, &type, &offset); + } + + switch (type) { + case TYPE_GIC_GICD: + return vgic_gicd_mem_read(vcpu, gicd, offset, value); + case TYPE_GIC_GICR_RD: + return vgic_gicrrd_mem_read(vcpu, gicr, offset, value); + case TYPE_GIC_GICR_SGI: + return vgic_gicrsgi_mem_read(vcpu, gicr, offset, value); + case TYPE_GIC_GICR_VLPI: + /* ignore vlpi register */ + return 0; + default: + return 0; + } + + return 0; +} + +int vgic_vdev_mem_write(struct z_virt_dev *vdev, uint64_t addr, uint64_t *value, uint16_t size) +{ + uint32_t offset; + int type = TYPE_GIC_INVAILD; + struct z_vcpu *vcpu = _current_vcpu; + struct vgicv3_dev *vgic = (struct vgicv3_dev *)vdev->priv_vdev; + struct virt_gic_gicd *gicd = &vgic->gicd; + struct virt_gic_gicr *gicr; + + if ((addr >= gicd->gicd_base) && (addr < gicd->gicd_base + gicd->gicd_size)) { + type = TYPE_GIC_GICD; + offset = addr - gicd->gicd_base; + } else { + gicr = get_vcpu_gicr_type(vgic, addr, &type, &offset); + } + + switch (type) { + case TYPE_GIC_GICD: + return vgic_gicd_mem_write(vcpu, gicd, offset, value); + case TYPE_GIC_GICR_RD: + return vgic_gicrrd_mem_write(vcpu, gicr, offset, value); + case TYPE_GIC_GICR_SGI: + return vgic_gicrsgi_mem_write(vcpu, gicr, offset, value); + case TYPE_GIC_GICR_VLPI: + return 0; + default: + return 0; + } + + return 0; +} + +int set_virq_to_vcpu(struct z_vcpu *vcpu, uint32_t virq_num) +{ + struct virt_irq_desc *desc; + + desc = vgic_get_virt_irq_desc(vcpu, virq_num); + if(!desc){ + ZVM_LOG_WARN("Get virt irq desc error here!"); + return -ESRCH; + } + + return vgic_set_virq(vcpu, desc); +} + +int set_virq_to_vm(struct z_vm *vm, uint32_t virq_num) +{ + uint32_t ret = 0; + struct virt_irq_desc *desc; + struct z_vcpu *vcpu, *target_vcpu; + vcpu = vm->vcpus[DEFAULT_VCPU]; + + if (virq_num < VM_LOCAL_VIRQ_NR) { + desc = &vcpu->virq_block.vcpu_virt_irq_desc[virq_num]; + } else if (virq_num <= VM_GLOBAL_VIRQ_NR) { + desc = &vm->vm_irq_block.vm_virt_irq_desc[virq_num - VM_LOCAL_VIRQ_NR]; + } else { + ZVM_LOG_WARN("The spi num that ready to allocate is too big."); + return -ENODEV; + } + + target_vcpu = vm->vcpus[desc->vcpu_id]; + ret = vgic_set_virq(target_vcpu, desc); + if (ret >= 0) { + return SET_IRQ_TO_VM_SUCCESS; + } + + return ret; +} + +int unset_virq_to_vm(struct z_vm *vm, uint32_t virq_num) +{ + uint32_t ret = 0; + struct virt_irq_desc *desc; + struct z_vcpu *vcpu, *target_vcpu; + vcpu = vm->vcpus[DEFAULT_VCPU]; + + if (virq_num < VM_LOCAL_VIRQ_NR) { + desc = &vcpu->virq_block.vcpu_virt_irq_desc[virq_num]; + } else if (virq_num <= VM_GLOBAL_VIRQ_NR) { + desc = &vm->vm_irq_block.vm_virt_irq_desc[virq_num - VM_LOCAL_VIRQ_NR]; + } else { + ZVM_LOG_WARN("The spi num that ready to allocate is too big."); + return -ENODEV; + } + + target_vcpu = vm->vcpus[desc->vcpu_id]; + ret = vgic_unset_virq(target_vcpu, desc); + if (ret >= 0) { + return UNSET_IRQ_TO_VM_SUCCESS; + } + + return ret; +} + +int virt_irq_sync_vgic(struct z_vcpu *vcpu) +{ + uint8_t lr_state; + uint64_t elrsr, eisr; + k_spinlock_key_t key; + struct virt_irq_desc *desc; + struct _dnode *d_node, *ds_node; + struct vcpu_virt_irq_block *vb = &vcpu->virq_block; + + key = k_spin_lock(&vb->spinlock); + if (vb->virq_pending_counts == 0) { + k_spin_unlock(&vb->spinlock, key); + return 0; + } + + /* Get the maintain or valid irq */ + elrsr = read_elrsr_el2(); + eisr = read_eisr_el2(); + elrsr |= eisr; + elrsr &= vcpu->arch->list_regs_map; + + SYS_DLIST_FOR_EACH_NODE_SAFE(&vb->active_irqs, d_node, ds_node) { + desc = CONTAINER_OF(d_node, struct virt_irq_desc, desc_node); + /* A valid interrupt? store it again! */ + if(!VGIC_ELRSR_REG_TEST(desc->id, elrsr)){ + continue; + } + + lr_state = gicv3_get_lr_state(vcpu, desc); + switch (lr_state) { + /* vm interrupt is done or need pending again when it is active */ + case VIRQ_STATE_ACTIVE: + /* if this sync is not irq trap */ + if(vcpu->exit_type != ARM_VM_EXCEPTION_IRQ){ + desc->virq_states = lr_state; + break; + } + case VIRQ_STATE_INVALID: + gicv3_update_lr(vcpu, desc, ACTION_CLEAR_VIRQ, 0); + vcpu->arch->hcr_el2 &= ~(uint64_t)HCR_VI_BIT; + sys_dlist_remove(&desc->desc_node); + /* if software irq is still triggered */ + if (desc->vdev_trigger) { + /* This means vm interrupt is done, but host interrupt is pending */ + sys_dlist_append(&vb->pending_irqs, &desc->desc_node); + } + vb->virq_pending_counts--; + /* vm interrupt is still pending, no need to inject again. */ + case VIRQ_STATE_PENDING: + case VIRQ_STATE_ACTIVE_AND_PENDING: + desc->virq_states = lr_state; + break; + } + } + k_spin_unlock(&vb->spinlock, key); + + return 0; +} + +int virt_irq_flush_vgic(struct z_vcpu *vcpu) +{ + int ret; + k_spinlock_key_t key; + struct virt_irq_desc *desc; + struct _dnode *d_node, *ds_node; + struct vcpu_virt_irq_block *vb = &vcpu->virq_block; + + key = k_spin_lock(&vb->spinlock); + if (vb->virq_pending_counts == 0) { + /* No pending irq, just return! */ + k_spin_unlock(&vb->spinlock, key); + return 0; + } + + /* no idle list register */ + if(vcpu->arch->list_regs_map == ((1 << VGIC_TYPER_LR_NUM) - 1)) { + k_spin_unlock(&vb->spinlock, key); + ZVM_LOG_WARN("There is no idle list register! "); + return 0; + } + + SYS_DLIST_FOR_EACH_NODE_SAFE(&vb->pending_irqs, d_node, ds_node) { + desc = CONTAINER_OF(d_node, struct virt_irq_desc, desc_node); + + /* if there the vm interrupt is not deactived, avoid inject it again */ + if(!(desc->virq_states==VIRQ_STATE_INVALID || desc->virq_states==VIRQ_STATE_ACTIVE)){ + continue; + } + + if (desc->virq_flags & VIRQ_PENDING_FLAG || desc->virq_flags & VIRQ_ACTIVED_FLAG) { + switch (VGIC_VIRQ_LEVEL_SORT(desc->virq_num)) { + case VGIC_VIRQ_IN_SGI: + vgic_set_sgi2vcpu(vcpu, desc); + case VGIC_VIRQ_IN_PPI: + default: + break; + } + desc->id = gicv3_get_idle_lr(vcpu); + if(desc->id < 0){ + ZVM_LOG_WARN("No idle list register for virq: %d. \n", desc->id); + break; + } + ret = gicv3_inject_virq(vcpu, desc); + if (ret) { + k_spin_unlock(&vb->spinlock, key); + return ret; + } + desc->virq_states = VIRQ_STATE_PENDING; + desc->virq_flags &= (uint32_t)~VIRQ_PENDING_FLAG; + sys_dlist_remove(&desc->desc_node); + sys_dlist_append(&vb->active_irqs, &desc->desc_node); + }else { + ZVM_LOG_WARN("Some thing wrong, virq-id %d is not pending but in the list. \n", desc->id); + gicv3_update_lr(vcpu, desc, ACTION_CLEAR_VIRQ, 0); + desc->id = VM_INVALID_DESC_ID; + sys_dlist_remove(&desc->desc_node); + } + } + k_spin_unlock(&vb->spinlock, key); + + return 0; +} + +struct virt_irq_desc *get_virt_irq_desc(struct z_vcpu *vcpu, uint32_t virq) +{ + return vgic_get_virt_irq_desc(vcpu, virq); +} diff --git a/subsys/zvm/vdev/vgic_v3.c b/subsys/zvm/vdev/vgic_v3.c new file mode 100644 index 00000000000000..5a39a90b46126a --- /dev/null +++ b/subsys/zvm/vdev/vgic_v3.c @@ -0,0 +1,680 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +#define VM_GIC_NAME vm_gic_v3 + +#define DEV_DATA(dev) \ + ((struct virt_device_data *)(dev)->data) + +#define DEV_CFG(dev) \ + ((const struct virt_device_config * const)(dev)->config) + +static const struct virtual_device_instance *gic_virtual_device_instance; + +static void vgicv3_lrs_load(struct gicv3_vcpuif_ctxt *ctxt) +{ + uint32_t rg_cout = VGIC_TYPER_LR_NUM; + + if (rg_cout > VGIC_TYPER_LR_NUM) { + ZVM_LOG_WARN("System list registers do not support! \n"); + return; + } + + switch (rg_cout) { + case 8: + write_sysreg(ctxt->ich_lr7_el2, ICH_LR7_EL2); + case 7: + write_sysreg(ctxt->ich_lr6_el2, ICH_LR6_EL2); + case 6: + write_sysreg(ctxt->ich_lr5_el2, ICH_LR5_EL2); + case 5: + write_sysreg(ctxt->ich_lr4_el2, ICH_LR4_EL2); + case 4: + write_sysreg(ctxt->ich_lr3_el2, ICH_LR3_EL2); + case 3: + write_sysreg(ctxt->ich_lr2_el2, ICH_LR2_EL2); + case 2: + write_sysreg(ctxt->ich_lr1_el2, ICH_LR1_EL2); + case 1: + write_sysreg(ctxt->ich_lr0_el2, ICH_LR0_EL2); + break; + default: + break; + } +} + +static void vgicv3_prios_load(struct gicv3_vcpuif_ctxt *ctxt) +{ + uint32_t rg_cout = VGIC_TYPER_PRIO_NUM; + + switch (rg_cout) { + case 7: + write_sysreg(ctxt->ich_ap0r2_el2, ICH_AP0R2_EL2); + write_sysreg(ctxt->ich_ap1r2_el2, ICH_AP1R2_EL2); + case 6: + write_sysreg(ctxt->ich_ap0r1_el2, ICH_AP0R1_EL2); + write_sysreg(ctxt->ich_ap1r1_el2, ICH_AP1R1_EL2); + case 5: + write_sysreg(ctxt->ich_ap0r0_el2, ICH_AP0R0_EL2); + write_sysreg(ctxt->ich_ap1r0_el2, ICH_AP1R0_EL2); + break; + default: + ZVM_LOG_ERR("Load prs error"); + } +} + +static void vgicv3_ctrls_load(struct gicv3_vcpuif_ctxt *ctxt) +{ + write_sysreg(ctxt->icc_sre_el1, ICC_SRE_EL1); + write_sysreg(ctxt->ich_vmcr_el2, ICH_VMCR_EL2); + write_sysreg(ctxt->ich_hcr_el2, ICH_HCR_EL2); +} + +static void vgicv3_lrs_save(struct gicv3_vcpuif_ctxt *ctxt) +{ + uint32_t rg_cout = VGIC_TYPER_LR_NUM; + + if (rg_cout > VGIC_TYPER_LR_NUM) { + ZVM_LOG_WARN("System list registers do not support! \n"); + return; + } + + switch (rg_cout) { + case 8: + ctxt->ich_lr7_el2 = read_sysreg(ICH_LR7_EL2); + case 7: + ctxt->ich_lr6_el2 = read_sysreg(ICH_LR6_EL2); + case 6: + ctxt->ich_lr5_el2 = read_sysreg(ICH_LR5_EL2); + case 5: + ctxt->ich_lr4_el2 = read_sysreg(ICH_LR4_EL2); + case 4: + ctxt->ich_lr3_el2 = read_sysreg(ICH_LR3_EL2); + case 3: + ctxt->ich_lr2_el2 = read_sysreg(ICH_LR2_EL2); + case 2: + ctxt->ich_lr1_el2 = read_sysreg(ICH_LR1_EL2); + case 1: + ctxt->ich_lr0_el2 = read_sysreg(ICH_LR0_EL2); + break; + default: + break; + } +} + +static void vgicv3_lrs_init(void) +{ + uint32_t rg_cout = VGIC_TYPER_LR_NUM; + + if (rg_cout > VGIC_TYPER_LR_NUM) { + ZVM_LOG_WARN("System list registers do not support! \n"); + return; + } + + rg_cout = rg_cout>8 ? 8 : rg_cout; + + switch (rg_cout) { + case 8: + write_sysreg(0, ICH_LR7_EL2); + case 7: + write_sysreg(0, ICH_LR6_EL2); + case 6: + write_sysreg(0, ICH_LR5_EL2); + case 5: + write_sysreg(0, ICH_LR4_EL2); + case 4: + write_sysreg(0, ICH_LR3_EL2); + case 3: + write_sysreg(0, ICH_LR2_EL2); + case 2: + write_sysreg(0, ICH_LR1_EL2); + case 1: + write_sysreg(0, ICH_LR0_EL2); + break; + default: + break; + } +} + +static void vgicv3_prios_save(struct gicv3_vcpuif_ctxt *ctxt) +{ + uint32_t rg_cout = VGIC_TYPER_PRIO_NUM; + + switch (rg_cout) { + case 7: + ctxt->ich_ap0r2_el2 = read_sysreg(ICH_AP0R2_EL2); + ctxt->ich_ap1r2_el2 = read_sysreg(ICH_AP1R2_EL2); + case 6: + ctxt->ich_ap0r1_el2 = read_sysreg(ICH_AP0R1_EL2); + ctxt->ich_ap1r1_el2 = read_sysreg(ICH_AP1R1_EL2); + case 5: + ctxt->ich_ap0r0_el2 = read_sysreg(ICH_AP0R0_EL2); + ctxt->ich_ap1r0_el2 = read_sysreg(ICH_AP1R0_EL2); + break; + default: + ZVM_LOG_ERR(" Set ich_ap priority failed. \n"); + } +} + +static void vgicv3_ctrls_save(struct gicv3_vcpuif_ctxt *ctxt) +{ + ctxt->icc_sre_el1 = read_sysreg(ICC_SRE_EL1); + ctxt->ich_vmcr_el2 = read_sysreg(ICH_VMCR_EL2); + ctxt->ich_hcr_el2 = read_sysreg(ICH_HCR_EL2); +} + +static int vdev_gicv3_init(struct z_vm *vm, struct vgicv3_dev *gicv3_vdev, uint32_t gicd_base, uint32_t gicd_size, + uint32_t gicr_base, uint32_t gicr_size) +{ + int i = 0; + uint32_t spi_num; + uint64_t tmp_typer = 0; + struct virt_gic_gicd *gicd = &gicv3_vdev->gicd; + struct virt_gic_gicr *gicr; + + gicd->gicd_base = gicd_base; + gicd->gicd_size = gicd_size; + gicd->gicd_regs_base = (uint32_t *)k_malloc(gicd->gicd_size); + if(!gicd->gicd_regs_base){ + return -ENXIO; + } + memset(gicd->gicd_regs_base, 0, gicd_size); + /* GICD PIDR2 */ + vgic_sysreg_write32(0x3<<4, gicd->gicd_regs_base, VGICD_PIDR2); + spi_num = ((VM_GLOBAL_VIRQ_NR + 32) >> 5) - 1; + /* GICD TYPER */ + tmp_typer = (vm->vcpu_num << 5) | (9 << 19) | spi_num; + vgic_sysreg_write32(tmp_typer, gicd->gicd_regs_base, VGICD_TYPER); + /* Init spinlock */ + ZVM_SPINLOCK_INIT(&gicd->gicd_lock); + + for (i = 0; i < MIN(VGIC_RDIST_SIZE/VGIC_RD_SGI_SIZE, vm->vcpu_num); i++) { + gicr = (struct virt_gic_gicr *)k_malloc(sizeof(struct virt_gic_gicr)); + if(!gicr){ + return -ENXIO; + } + /* store the vcpu id for gicr */ + gicr->vcpu_id = i; + + /* init redistribute size */ + gicr->gicr_rd_size = VGIC_RD_BASE_SIZE; + gicr->gicr_rd_reg_base = (uint32_t *)k_malloc(gicr->gicr_rd_size); + if(!gicr->gicr_rd_reg_base) { + ZVM_LOG_ERR("Allocat memory for gicr_rd error! \n"); + return -ENXIO; + } + memset(gicr->gicr_rd_reg_base, 0, gicr->gicr_rd_size); + + /* init sgi redistribute size */ + gicr->gicr_sgi_size = VGIC_SGI_BASE_SIZE; + gicr->gicr_sgi_reg_base = (uint32_t *)k_malloc(gicr->gicr_sgi_size); + if(!gicr->gicr_sgi_reg_base) { + ZVM_LOG_ERR("Allocat memory for gicr_sgi error! \n"); + return -ENXIO; + } + memset(gicr->gicr_sgi_reg_base, 0, gicr->gicr_sgi_size); + + gicr->gicr_rd_base = gicr_base + VGIC_RD_SGI_SIZE * i; + gicr->gicr_sgi_base = gicr->gicr_rd_base + VGIC_RD_BASE_SIZE; + vgic_sysreg_write32(0x3<<4, gicr->gicr_rd_reg_base, VGICR_PIDR2); + ZVM_SPINLOCK_INIT(&gicr->gicr_lock); + + /* GICR TYPER */ + tmp_typer = 1 << GICR_TYPER_LPI_AFFINITY_SHIFT | i << GICR_TYPER_PROCESSOR_NUMBER_SHIFT | ((uint64_t)i << GICR_TYPER_AFFINITY_VALUE_SHIFT); + if(i >= vm->vcpu_num - 1) { + /* set last gicr region flag here, means it is the last gicr region */ + tmp_typer |= 1 << GICR_TYPER_LAST_SHIFT; + } + vgic_sysreg_write64(tmp_typer, gicr->gicr_rd_reg_base, VGICR_TYPER); + vgic_sysreg_write64(tmp_typer, gicr->gicr_sgi_reg_base, VGICR_TYPER); + + gicv3_vdev->gicr[i] = gicr; + } + + ZVM_LOG_INFO("** List register num: %lld \n", VGIC_TYPER_LR_NUM); + vgicv3_lrs_init(); + + return 0; +} + +static int vdev_gicv3_deinit(struct z_vm *vm, struct vgicv3_dev *gicv3_vdev) +{ + ARG_UNUSED(vm); + int i = 0; + struct virt_gic_gicd *gicd = &gicv3_vdev->gicd; + struct virt_gic_gicr *gicr; + + for (i = 0; i < MIN(VGIC_RDIST_SIZE/VGIC_RD_SGI_SIZE, vm->vcpu_num); i++) { + gicr = gicv3_vdev->gicr[i]; + k_free(gicr->gicr_rd_reg_base); + k_free(gicr->gicr_sgi_reg_base); + k_free(gicr); + } + k_free(gicd->gicd_regs_base); + + return 0; +} + +/** + * @brief init vm gic device for each vm. Including: + * 1. creating virt device for vm. + * 2. building memory map for this device. +*/ +static int vm_vgicv3_init(const struct device *dev, struct z_vm *vm, struct z_virt_dev *vdev_desc) +{ + ARG_UNUSED(dev); + ARG_UNUSED(vdev_desc); + int ret; + uint32_t gicd_base, gicd_size, gicr_base, gicr_size; + struct z_virt_dev *virt_dev; + struct vgicv3_dev *vgicv3; + + gicd_base = VGIC_DIST_BASE; + gicd_size = VGIC_DIST_SIZE; + gicr_base = VGIC_RDIST_BASE; + gicr_size = VGIC_RDIST_SIZE; + /* check gic device */ + if(!gicd_base || !gicd_size || !gicr_base || !gicr_size){ + ZVM_LOG_ERR("GIC device has init error!"); + return -ENODEV; + } + + /* Init virtual device for vm. */ + virt_dev = vm_virt_dev_add(vm, gic_virtual_device_instance->name, false, false, gicd_base, + gicd_base, gicr_base+gicr_size-gicd_base, 0, 0); + if(!virt_dev){ + return -ENODEV; + } + + /* Init virtual gic device for virtual device. */ + vgicv3 = (struct vgicv3_dev *)k_malloc(sizeof(struct vgicv3_dev)); + if (!vgicv3) { + ZVM_LOG_ERR("Allocat memory for vgicv3 error \n"); + return -ENODEV; + } + ret = vdev_gicv3_init(vm, vgicv3, gicd_base, gicd_size, gicr_base, gicr_size); + if(ret){ + ZVM_LOG_ERR("Init virt gicv3 error \n"); + return -ENODEV; + } + + /* get the private data for vgicv3 */ + virt_dev->priv_data = gic_virtual_device_instance; + virt_dev->priv_vdev = vgicv3; + + return 0; +} + +static int vm_vgicv3_deinit(const struct device *dev, struct z_vm *vm, struct z_virt_dev *vdev_desc) +{ + ARG_UNUSED(dev); + int ret; + struct vgicv3_dev *vgicv3; + + vgicv3 = (struct vgicv3_dev *)vdev_desc->priv_vdev; + if(!vgicv3){ + ZVM_LOG_WARN("Can not find virt gicv3 device! \n"); + return 0; + } + ret = vdev_gicv3_deinit(vm, vgicv3); + if(ret){ + ZVM_LOG_WARN("Deinit virt gicv3 error \n"); + return 0; + } + k_free(vgicv3); + + vdev_desc->priv_vdev = NULL; + vdev_desc->priv_data = NULL; + ret = vm_virt_dev_remove(vm, vdev_desc); + return ret; +} + +/** + * @brief The init function of vgic, it provides the + * gic hardware device information to ZVM. +*/ +static int virt_gic_v3_init(void) +{ + int i; + + for (i = 0; i < zvm_virtual_devices_count_get(); i++) { + const struct virtual_device_instance *virtual_device = zvm_virtual_device_get(i); + if(strcmp(virtual_device->name, TOSTRING(VM_GIC_NAME))){ + continue; + } + DEV_DATA(virtual_device)->vdevice_type |= VM_DEVICE_PRE_KERNEL_1; + gic_virtual_device_instance = virtual_device; + break; + } + return 0; +} + +static struct virt_device_config virt_gicv3_cfg = { + .hirq_num = VM_DEVICE_INVALID_VIRQ, + .device_config = NULL, +}; + +static struct virt_device_data virt_gicv3_data_port = { + .device_data = NULL, +}; + +/** + * @brief vgic device operations api. +*/ +static const struct virt_device_api virt_gicv3_api = { + .init_fn = vm_vgicv3_init, + .deinit_fn = vm_vgicv3_deinit, + .virt_device_read = vgic_vdev_mem_read, + .virt_device_write = vgic_vdev_mem_write, +}; + +ZVM_VIRTUAL_DEVICE_DEFINE(virt_gic_v3_init, + POST_KERNEL, CONFIG_VM_VGICV3_INIT_PRIORITY, + VM_GIC_NAME, + virt_gicv3_data_port, + virt_gicv3_cfg, + virt_gicv3_api); + +/*******************vgicv3 function****************************/ + +bool virt_irq_ispending(struct z_vcpu *vcpu) +{ + uint32_t *mem_addr_base = NULL; + uint32_t pend_addrend; + struct z_vm *vm; + struct z_virt_dev *vdev; + struct _dnode *d_node, *ds_node; + + vm = vcpu->vm; + SYS_DLIST_FOR_EACH_NODE_SAFE(&vm->vdev_list, d_node, ds_node){ + vdev = CONTAINER_OF(d_node, struct z_virt_dev, vdev_node); + if (!strcmp(vdev->name, TOSTRING(VM_GIC_NAME))) { + mem_addr_base = arm_gic_get_distbase(vdev); + break; + } + } + + if(mem_addr_base == NULL){ + ZVM_LOG_ERR("Can not find gic controller! \n"); + return false; + } + mem_addr_base += VGICD_ISPENDRn; + pend_addrend = (uint64_t)mem_addr_base+(VGICD_ICPENDRn-VGICD_ISPENDRn); + for(; (uint64_t)mem_addr_base < pend_addrend; mem_addr_base++){ + if(vgic_irq_test_bit(vcpu, 0, mem_addr_base, 32, 0)){ + return true; + } + } + return false; +} + +uint32_t *arm_gic_get_distbase(struct z_virt_dev *vdev) +{ + struct vgicv3_dev *vgic = (struct vgicv3_dev *)vdev->priv_vdev; + struct virt_gic_gicd *gicd = &vgic->gicd; + + return gicd->gicd_regs_base; +} + +int gicv3_inject_virq(struct z_vcpu *vcpu, struct virt_irq_desc *desc) +{ + uint64_t value = 0; + struct gicv3_list_reg *lr = (struct gicv3_list_reg *)&value; + + if (desc->id >= VGIC_TYPER_LR_NUM) { + ZVM_LOG_WARN("invalid virq id %d, It is used by other device! \n", desc->id); + return -EINVAL; + } + + /* List register is not activated. */ + if (VGIC_LIST_REGS_TEST(desc->id, vcpu)) { + value = gicv3_read_lr(desc->id); + lr = (struct gicv3_list_reg *)&value; + if (lr->vINTID == desc->virq_num) { + desc->virq_flags |= VIRQ_PENDING_FLAG; + } + } + + lr->vINTID = desc->virq_num; + lr->pINTID = desc->pirq_num; + lr->priority = desc->prio; + lr->group = LIST_REG_GROUP1; + lr->hw = LIST_REG_HW_VIRQ; + lr->state = VIRQ_STATE_PENDING; + gicv3_update_lr(vcpu, desc, ACTION_SET_VIRQ, value); + return 0; +} + +int vgicv3_raise_sgi(struct z_vcpu *vcpu, unsigned long sgi_value) +{ + int i, bit, sgi_num=0; + uint32_t sgi_id, sgi_mode; + uint32_t target_list, aff1, aff2, aff3, tmp_id; + uint32_t target_vcpu_list = 0; + struct z_vcpu *target; + struct z_vm *vm = vcpu->vm; + k_spinlock_key_t key; + + sgi_id = (sgi_value & (0xf << 24)) >> 24; + __ASSERT_NO_MSG(GIC_IS_SGI(sgi_id)); + + sgi_mode = sgi_value & (1UL << 40) ? SGI_SIG_TO_OTHERS : SGI_SIG_TO_LIST; + if (sgi_mode == SGI_SIG_TO_OTHERS) { + for (i = 0; i < vm->vcpu_num; i++) { + target = vm->vcpus[i]; + if (target == vcpu) { + continue; + } + target->virq_block.pending_sgi_num = sgi_id; + key = k_spin_lock(&target->vcpu_lock); + target->vcpuipi_count ++; + k_spin_unlock(&target->vcpu_lock, key); + } + arch_sched_broadcast_ipi(); + } else if (sgi_mode == SGI_SIG_TO_LIST) { + target_list = sgi_value & 0xffff; + aff1 = (sgi_value & (uint64_t)(0xffUL << 16)) >> 16; + aff2 = (sgi_value & (uint64_t)(0xffUL << 32)) >> 32; + aff3 = (sgi_value & (uint64_t)(0xffUL << 48)) >> 48; + for (bit = 0; bit < 16; bit++) { + if (sys_test_bit((uintptr_t)&target_list, bit)) { + /*Each cluster has CONFIG_MP_NUM_CPUS*/ + tmp_id = aff1 * CONFIG_MP_NUM_CPUS + bit; + sys_set_bits((uintptr_t)&target_vcpu_list, BIT(tmp_id)); + /*TODO: May need modified to vm->vcpu_num. */ + if(++sgi_num > CONFIG_MAX_VCPU_PER_VM || tmp_id >= CONFIG_MAX_VCPU_PER_VM) { + ZVM_LOG_WARN("The target cpu list is too long."); + return -ESRCH; + } + target = vm->vcpus[tmp_id]; + target->virq_block.pending_sgi_num = sgi_id; + key = k_spin_lock(&target->vcpu_lock); + target->vcpuipi_count ++; + k_spin_unlock(&target->vcpu_lock, key); + } + } + if(target_vcpu_list & BIT(tmp_id)) { + set_virq_to_vm(vcpu->vm, sgi_id); + /* Set vcpu flag include itself */ + if(target_vcpu_list & ~BIT(tmp_id)) { + arch_sched_broadcast_ipi(); + } + } else { + arch_sched_broadcast_ipi(); + } + } else { + ZVM_LOG_WARN("Unsupported sgi signal."); + return -ESRCH; + } + return 0; +} + +int vgic_gicrsgi_mem_read(struct z_vcpu *vcpu, struct virt_gic_gicr *gicr, + uint32_t offset, uint64_t *v) +{ + uint32_t *value = (uint32_t *)v; + + switch (offset) { + case GICR_SGI_CTLR: + *value = vgic_sysreg_read32(gicr->gicr_sgi_reg_base, VGICR_CTLR) & ~(1 << 31); + break; + case GICR_SGI_ISENABLER: + *value = vgic_sysreg_read32(gicr->gicr_sgi_reg_base, VGICR_ISENABLER0); + break; + case GICR_SGI_ICENABLER: + *value = vgic_sysreg_read32(gicr->gicr_sgi_reg_base, VGICR_ICENABLER0); + break; + case GICR_SGI_PENDING: + vgic_sysreg_write32(*value, gicr->gicr_sgi_reg_base, VGICR_SGI_PENDING); + break; + case GICR_SGI_PIDR2: + *value = (0x03 << 4); + break; + default: + *value = 0; + break; + } + + return 0; +} + +int vgic_gicrsgi_mem_write(struct z_vcpu *vcpu, struct virt_gic_gicr *gicr, uint32_t offset, uint64_t *v) +{ + uint32_t *value = (uint32_t *)v; + uint32_t mem_addr = (uint64_t)v; + int bit; + + switch (offset) { + case GICR_SGI_ISENABLER: + vgic_test_and_set_enable_bit(vcpu, 0, value, 32, 1, gicr); + break; + case GICR_SGI_ICENABLER: + vgic_test_and_set_enable_bit(vcpu, 0, value, 32, 0, gicr); + break; + case GICR_SGI_PENDING: + /* clear pending state */ + for(bit = 0; bit < 32; bit++) { + if (sys_test_bit(mem_addr, bit)) { + sys_write32(BIT(bit), GIC_RDIST_BASE + GICR_SGI_BASE_OFF + GICR_SGI_PENDING); + vgic_sysreg_write32(~BIT(bit), gicr->gicr_sgi_reg_base, VGICR_SGI_PENDING); + } + } + break; + default: + *value = 0; + break; + } + + return 0; +} + +int vgic_gicrrd_mem_read(struct z_vcpu *vcpu, struct virt_gic_gicr *gicr, uint32_t offset, uint64_t *v) +{ + uint64_t *value = v; + + /* consider multiple cpu later, Now just return 0 */ + switch (offset) { + case 0xffe8: + *value = vgic_sysreg_read32(gicr->gicr_rd_reg_base, VGICR_PIDR2); + break; + case GICR_CTLR: + vgic_sysreg_write32(*value, gicr->gicr_rd_reg_base, VGICR_CTLR); + break; + case GICR_TYPER: + *value = vgic_sysreg_read64(gicr->gicr_rd_reg_base, VGICR_TYPER); + break; + default: + *value = 0; + break; + } + + return 0; +} + +int vgic_gicrrd_mem_write(struct z_vcpu *vcpu, struct virt_gic_gicr *gicr, uint32_t offset, uint64_t *v) +{ + return 0; +} + +struct virt_gic_gicr* get_vcpu_gicr_type(struct vgicv3_dev *vgic, uint32_t addr, + uint32_t *type, uint32_t *offset) +{ + int i; + struct virt_gic_gicr *gicr; + struct z_vm *vm = get_current_vm(); + + for(i = 0; i < MIN(VGIC_RDIST_SIZE/VGIC_RD_SGI_SIZE, vm->vcpu_num); i++) { + gicr = vgic->gicr[i]; + if ((addr >= gicr->gicr_sgi_base) && addr < gicr->gicr_sgi_base + gicr->gicr_sgi_size) { + *offset = addr - gicr->gicr_sgi_base; + *type = TYPE_GIC_GICR_SGI; + return vgic->gicr[i]; + } + + if (addr >= gicr->gicr_rd_base && addr < (gicr->gicr_rd_base + gicr->gicr_rd_size)) { + *offset = addr - gicr->gicr_rd_base; + *type = TYPE_GIC_GICR_RD; + return vgic->gicr[i]; + } + } + + *type = TYPE_GIC_INVAILD; + return NULL; +} + +int vgicv3_state_load(struct z_vcpu *vcpu, struct gicv3_vcpuif_ctxt *ctxt) +{ + vgicv3_lrs_load(ctxt); + vgicv3_prios_load(ctxt); + vgicv3_ctrls_load(ctxt); + + arch_vdev_irq_enable(vcpu); + return 0; +} + +int vgicv3_state_save(struct z_vcpu *vcpu, struct gicv3_vcpuif_ctxt *ctxt) +{ + vgicv3_lrs_save(ctxt); + vgicv3_prios_save(ctxt); + vgicv3_ctrls_save(ctxt); + + arch_vdev_irq_disable(vcpu); + return 0; +} + +int vcpu_gicv3_init(struct gicv3_vcpuif_ctxt *ctxt) +{ + + ctxt->icc_sre_el1 = 0x07; + ctxt->icc_ctlr_el1 = read_sysreg(ICC_CTLR_EL1); + + ctxt->ich_vmcr_el2 = GICH_VMCR_VENG1 | GICH_VMCR_DEFAULT_MASK; + ctxt->ich_hcr_el2 = GICH_HCR_EN; + + return 0; +} \ No newline at end of file diff --git a/subsys/zvm/vdev/virt_psci.c b/subsys/zvm/vdev/virt_psci.c new file mode 100644 index 00000000000000..ca6055d2a76434 --- /dev/null +++ b/subsys/zvm/vdev/virt_psci.c @@ -0,0 +1,188 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include <../drivers/pm_cpu_ops/pm_cpu_ops_psci.h> +#include +#include +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +static uint32_t psci_get_function_id(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) { + uint64_t reg_value = *find_index_reg(0, arch_ctxt); + return reg_value & ~((uint32_t)0); +} + +static void psci_system_off(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + zvm_shutdown_guest(vcpu->vm); +} + +static void psci_system_reset(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + zvm_reboot_guest(vcpu->vm); +} + +static inline void psci_set_reg(uint32_t psci_fn, struct z_vcpu *vcpu, + arch_commom_regs_t *arch_ctxt, + uint32_t reg, unsigned long val) +{ + uint64_t *reg_value; + reg_value = find_index_reg(reg, arch_ctxt); + *reg_value = (uint64_t)val; +} + +uint64_t psci_vcpu_suspend(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + return PSCI_RET_SUCCESS; +} + +uint64_t psci_vcpu_off(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + return PSCI_RET_SUCCESS; +} + +uint64_t psci_vcpu_affinity_info(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + return PSCI_RET_SUCCESS; +} + +uint64_t psci_vcpu_migration(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + ZVM_LOG_WARN("PSCI_0_2_FN_MIGRATE\n"); + ZVM_LOG_WARN("do not support now! \n"); + return -1; +} + +uint64_t psci_vcpu_migration_info_type(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + return PSCI_0_2_TOS_MP; +} + +uint64_t psci_vcpu_other(unsigned long psci_func) +{ + ZVM_LOG_WARN("PSCI_0_2_FN_OTHER: %lx \n", psci_func); + ZVM_LOG_WARN("do not support now! \n"); + return -1; +} + +uint64_t psci_vcpu_on(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + uint64_t cpu_id; + + uint64_t context_id; + uint64_t target_pc; + struct zvm_vcpu_context *ctxt; + struct z_vm *vm = vcpu->vm; + + cpu_id = arch_ctxt->esf_handle_regs.x1; + target_pc = arch_ctxt->esf_handle_regs.x2; + context_id = arch_ctxt->esf_handle_regs.x3; + vcpu = vm->vcpus[cpu_id]; + + ctxt = &vcpu->arch->ctxt; + ctxt->regs.pc = target_pc; + + vm_vcpu_ready(vcpu); + return PSCI_RET_SUCCESS; +} + +/* + * x0: function_id + * x1-x3: psci function args + * x0-x4: ret +*/ +static int psci_0_2_call(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + uint32_t psci_fn = psci_get_function_id(vcpu, arch_ctxt); + uint32_t val; + + switch (psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + /* + * Bits[31:16] = Major Version = 0 + * Bits[15:0] = Minor Version = 2 + */ + val = 2; + break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + val = psci_vcpu_suspend(vcpu, arch_ctxt); + break; + case PSCI_0_2_FN_CPU_OFF: + psci_vcpu_off(vcpu, arch_ctxt); + val = PSCI_RET_SUCCESS; + break; + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + val = psci_vcpu_on(vcpu, arch_ctxt); + break; + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + val = psci_vcpu_affinity_info(vcpu, arch_ctxt); + break; + case PSCI_0_2_FN_MIGRATE: + case PSCI_0_2_FN64_MIGRATE: + val = psci_vcpu_migration(vcpu, arch_ctxt); + break; + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + /* + * Trusted OS is MP hence does not require migration + * or + * Trusted OS is not present + */ + val = psci_vcpu_migration_info_type(vcpu, arch_ctxt); + break; + case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + val = PSCI_RET_NOT_SUPPORTED; + break; + case PSCI_0_2_FN_SYSTEM_OFF: + psci_system_off(vcpu, arch_ctxt); + /* + * We should'nt be going back to guest VCPU after + * receiving SYSTEM_OFF request. + * + * If we accidently resume guest VCPU after SYSTEM_OFF + * request then guest VCPU should see internal failure + * from PSCI return value. To achieve this, we preload + * r0 (or x0) with PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + break; + case PSCI_0_2_FN_SYSTEM_RESET: + psci_system_reset(vcpu, arch_ctxt); + /* + * Same reason as SYSTEM_OFF for preloading r0 (or x0) + * with PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + break; + default: + ZVM_LOG_INFO("UNKNOWN PSCI FUNCTION ID\n "); + return -1; + } + + if(val != PSCI_RET_INTERNAL_FAILURE) { + psci_set_reg(psci_fn, vcpu, arch_ctxt, 0, val); + } + + return 0; +} + +int do_psci_call(struct z_vcpu *vcpu, arch_commom_regs_t *arch_ctxt) +{ + if (!vcpu || !arch_ctxt) { + return -1; + } + + /*TODO: support psci-1.0 */ + return psci_0_2_call(vcpu, arch_ctxt); +} \ No newline at end of file diff --git a/subsys/zvm/vdev/vpl011.c b/subsys/zvm/vdev/vpl011.c new file mode 100644 index 00000000000000..6481e7629c84da --- /dev/null +++ b/subsys/zvm/vdev/vpl011.c @@ -0,0 +1,263 @@ + +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Yuhao Hu, Qingqiao Wang and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +#define DEV_DATA(dev) \ + ((struct virt_device_data *)(dev)->data) + +static const struct virtual_device_instance *serial_virtual_device_instance; + +static int vpl011_regs_init(struct z_vm *vm, struct virt_pl011 *pl011, uint32_t serial_base, uint32_t serial_size) +{ + + struct virt_pl011 *vpl011 = pl011; + uint8_t pl011_id[8] = ARM_PL011_ID; + int i; + + vpl011->vm = vm; + vpl011->vserial_base = serial_base; + vpl011->vserial_size = serial_size; + vpl011->vserial_reg_base = (uint32_t *)k_malloc(vpl011->vserial_size); + + /* check serial device */ + if(!vpl011->vserial_reg_base) { + ZVM_LOG_ERR("vpl011 device has init error!"); + return -ENODEV; + } + memset(vpl011->vserial_reg_base, 0, serial_size); + + /* Init spinlock */ + ZVM_SPINLOCK_INIT(&vpl011->vserial_lock); + VPL011_REGS(vpl011)->fr = 0x90; + VPL011_REGS(vpl011)->cr = 0x30; + VPL011_REGS(vpl011)->ifls = 0x12; + for (i = 0; i < 8; i++) { + VPL011_REGS(vpl011)->id[i] = pl011_id[i]; + } + + return 0; +} + +static int vserial_vdev_mem_read(struct z_virt_dev *vdev, uint64_t addr, uint64_t *value, uint16_t size) +{ + uint32_t offset; + uint32_t *v; + struct virt_pl011 *vs = (struct virt_pl011 *)vdev->priv_vdev; + + *value = 0; + vs->set_irq = false; + v = (uint32_t *)value; + offset = addr - vs->vserial_base; + if (offset == 0) { + VDEV_REGS(vdev)->fr &= ~VPL011_FR_RXFF; + VDEV_REGS(vdev)->ris &= ~VPL011_INT_RX; + vs->level = VDEV_REGS(vdev)->ris; + vs->enabled = VDEV_REGS(vdev)->imsc; + + struct K_fifo_data *rdata = k_fifo_get(&vs->rx_fifo, K_NO_WAIT); + vs->count--; + if (vs->count>0) + { + VDEV_REGS(vdev)->ris |= VPL011_INT_RX; + vs->level = VDEV_REGS(vdev)->ris; + }else + { + VDEV_REGS(vdev)->fr |= VPL011_FR_RXFE; + } + *v = rdata->data[0]; + }else if (offset == 0x40) { + *v = VDEV_REGS(vdev)->ris & VDEV_REGS(vdev)->imsc; + }else if (offset >= 0xfe0 && offset < 0x1000) { + *v = VDEV_REGS(vdev)->id[(offset - 0xfe0)>>2]; + } + else { + *v = vserial_sysreg_read32(vs->vserial_reg_base, offset); + } + + return 0; +} + +static int vserial_vdev_mem_write(struct z_virt_dev *vdev, uint64_t addr, uint64_t *value, uint16_t size) +{ + uint32_t offset; + struct virt_pl011 *vs = (struct virt_pl011 *)vdev->priv_vdev; + uint32_t *v = (uint32_t *)value; + + vs->set_irq = false; + offset = addr - vs->vserial_base; + vserial_sysreg_write32(*v, vs->vserial_reg_base, offset); + + if (offset==0) { + VDEV_REGS(vdev)->ris |= VPL011_INT_TX; + vs->level = VDEV_REGS(vdev)->ris; + vs->enabled = VDEV_REGS(vdev)->imsc; + + VDEV_REGS(vdev)->dr = 0x00; + if (vs->connecting) { + uart_poll_out_to_host((unsigned char)*v); + } + }else if (offset==0x38) { + vs->level = VDEV_REGS(vdev)->ris; + vs->enabled = VDEV_REGS(vdev)->imsc; + }else if (offset==0x44 ) { + VDEV_REGS(vdev)->imsc &= ~VDEV_REGS(vdev)->icr; + VDEV_REGS(vdev)->ris &= ~VDEV_REGS(vdev)->icr; + } + + return 0; +} + +static int pl011_virt_serial_send(struct virt_serial *serial, unsigned char *data,int len) +{ + struct virt_pl011 *vpl011 =(struct virt_pl011 *)get_virt_serial_device(serial); + int using_buffer_index = vpl011->count; + + for (size_t i = 0; i < len; i++) { + serial->send_buffer[i+using_buffer_index].data[0]= data[i]; + k_fifo_put(&vpl011->rx_fifo,&serial->send_buffer[i+using_buffer_index]); + vpl011->count++; + } + VPL011_REGS(vpl011)->fr &= ~PL011_FLAG_RXFE; + if (VPL011_REGS(vpl011)->cr & 0x10 || vpl011->count == FIFO_SIZE) { + VPL011_REGS(vpl011)->fr |= PL011_FLAG_RXFF; + } + if (vpl011->count >= 0) { + VPL011_REGS(vpl011)->ris |= PL011_INT_RX; + vpl011->set_irq = true; + vpl011->level = VPL011_REGS(vpl011)->ris; + vpl011->enabled = VPL011_REGS(vpl011)->imsc; + } + return 0; +} + +static int vm_virt_serial_init(const struct device *dev, struct z_vm *vm, struct z_virt_dev *vdev_desc) +{ + ARG_UNUSED(dev); + ARG_UNUSED(vdev_desc); + char name[64]; + int ret; + uint32_t serial_base, serial_size, virq; + struct z_virt_dev *virt_dev; + struct virt_pl011 *vpl011; + + serial_base = VSERIAL_REG_BASE; + serial_size = VSERIAL_REG_SIZE; + + /* check gic device */ + if(!serial_base || !serial_size){ + ZVM_LOG_ERR("vSERIAL device has init error!"); + return -ENODEV; + } + + /* Init virtual device for vm. */ + virq = VSERIAL_HIRQ_NUM; + virt_dev = vm_virt_dev_add(vm, TOSTRING(VIRT_SERIAL_NAME), false, false, serial_base, + serial_base, serial_size, virq, virq); + if(!virt_dev) { + ZVM_LOG_ERR("Allocat memory for vserial error \n"); + return -ENODEV; + } + /* Init virtual gic device for virtual device. */ + vpl011 = (struct virt_pl011 *)k_malloc(sizeof(struct virt_pl011)); + if (!vpl011) { + ZVM_LOG_ERR("Allocat memory for vserial error \n"); + return -ENODEV; + } + + ret = vpl011_regs_init(vm, vpl011, serial_base, serial_size); + if(ret) { + ZVM_LOG_ERR("Init virt serial error \n"); + return -ENODEV; + } + vpl011->irq = virq; + vpl011->count = 0; + k_fifo_init(&vpl011->rx_fifo); + + /* init virt_serial abstract instance*/ + strncpy(name, vm->vm_name, sizeof(name)); + strncat(name, "/vpl011", sizeof(name)); + + vpl011->vserial = virt_serial_create(name, &pl011_virt_serial_send, vpl011); + vm_device_irq_init(vm, virt_dev); + + virt_dev->priv_data = serial_virtual_device_instance; + virt_dev->priv_vdev = vpl011; + return 0; +} + +static int vm_virt_serial_deinit(const struct device *dev, struct z_vm *vm, struct z_virt_dev *vdev_desc) +{ + ARG_UNUSED(dev); + int ret = 0; + struct virt_pl011 *vpl011; + + vpl011 = (struct virt_pl011 *)vdev_desc->priv_vdev; + if (vpl011->vserial) { + virt_serial_destroy(vpl011->vserial); + } + k_free(vpl011); + + vdev_desc->priv_data = NULL; + vdev_desc->priv_vdev = NULL; + ret = vm_virt_dev_remove(vm, vdev_desc); + return ret; +} + +static int virt_pl011_init(void) +{ + int i; + for (i = 0; i < zvm_virtual_devices_count_get(); i++) { + const struct virtual_device_instance *virtual_device = zvm_virtual_device_get(i); + if(strcmp(virtual_device->name, TOSTRING(VIRT_SERIAL_NAME))){ + continue; + } + DEV_DATA(virtual_device)->vdevice_type |= VM_DEVICE_PRE_KERNEL_1; + serial_virtual_device_instance = virtual_device; + break; + } + + return 0; +} + +static struct virt_device_config virt_pl011_cfg = { + .hirq_num = 0, + .device_config = NULL, +}; + +static struct virt_device_data virt_pl011_data_port = { + .device_data = NULL, +}; + +/** + * @brief vserial device operations api. +*/ +static const struct virt_device_api virt_pl011_api = { + .init_fn = vm_virt_serial_init, + .deinit_fn = vm_virt_serial_deinit, + .virt_device_read = vserial_vdev_mem_read, + .virt_device_write = vserial_vdev_mem_write, +}; + +ZVM_VIRTUAL_DEVICE_DEFINE(virt_pl011_init, + POST_KERNEL, CONFIG_VM_VSERIAL_INIT_PRIORITY, + VIRT_SERIAL_NAME, + virt_pl011_data_port, + virt_pl011_cfg, + virt_pl011_api); \ No newline at end of file diff --git a/subsys/zvm/vdev/vserial.c b/subsys/zvm/vdev/vserial.c new file mode 100644 index 00000000000000..714cef54ce2ab5 --- /dev/null +++ b/subsys/zvm/vdev/vserial.c @@ -0,0 +1,251 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Yuhao Hu, Qingqiao Wang and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +extern struct zvm_manage_info *zvm_overall_info; + +static struct z_virt_serial_ctrl virt_serial_ctrl; +static struct k_thread tx_it_emulator_thread_data; + +K_SEM_DEFINE(connect_vm_sem, 0, 1); +K_THREAD_STACK_DEFINE(tx_it_emulator_thread_stack, 1024); + +uint32_t virt_serial_count(void) +{ + uint32_t retval = 0; + struct virt_serial *vs; + + k_mutex_lock(&virt_serial_ctrl.virt_serial_list_lock, K_FOREVER); + + sys_dnode_t *vserial_node ; + + SYS_DLIST_FOR_EACH_NODE(&virt_serial_ctrl.virt_serial_list, vserial_node) { + vs = CONTAINER_OF(vserial_node, struct virt_serial, node); + printk("[%d]serial name:%s ,vmid:%d\n",retval,vs->name,((struct z_vm *)vs->vm)->vmid); + retval++; + } + k_mutex_unlock(&virt_serial_ctrl.virt_serial_list_lock); + + return retval; +} + + +struct virt_serial* get_vserial(uint8_t vmid) +{ + struct virt_serial *serial = NULL; + struct virt_serial *tmpserial ; + sys_dnode_t *vserial_node; + SYS_DLIST_FOR_EACH_NODE(&virt_serial_ctrl.virt_serial_list, vserial_node) { + tmpserial = CONTAINER_OF(vserial_node, struct virt_serial , node); + if (((struct z_vm *)(tmpserial->vm))->vmid == vmid) { + serial = tmpserial; + } + + } + if (serial == NULL) { + printk("No virtual serial devices[vmid:%d]\n",vmid); + return NULL; + } + + return serial; +} + +struct virt_serial *virt_serial_create(const char *name, + int (*send) (struct virt_serial *, unsigned char *, int ), + void *priv) +{ + bool found = false; + struct virt_serial *vserial = NULL; + struct virt_pl011 *v_s; + sys_dnode_t *vserial_node; + + if (!name) { + return NULL; + } + + k_mutex_lock(&virt_serial_ctrl.virt_serial_list_lock, K_FOREVER); + SYS_DLIST_FOR_EACH_NODE(&virt_serial_ctrl.virt_serial_list, vserial_node) { + vserial = CONTAINER_OF(vserial_node, struct virt_serial, node); + if (strcmp(name, vserial->name) == 0) { + found = true; + break; + } + } + + if (found) { + k_mutex_unlock(&virt_serial_ctrl.virt_serial_list_lock); + v_s = (struct virt_pl011 *)priv; + vserial->send = send; + vserial->vm = v_s->vm; + vserial->priv = priv; + vserial->count=0; + return vserial; + } + + vserial = k_calloc(1, (sizeof(struct virt_serial))); + if (!vserial) { + k_mutex_unlock(&virt_serial_ctrl.virt_serial_list_lock); + return NULL; + } + vserial->count=0; + + if (strlen(name) >= sizeof(vserial->name)) { + k_free(vserial); + k_mutex_unlock(&virt_serial_ctrl.virt_serial_list_lock); + return NULL; + }else { + strncpy(vserial->name, name, sizeof(vserial->name)); + } + + v_s = (struct virt_pl011 *)priv; + vserial->send = send; + vserial->vm = v_s->vm; + vserial->priv = priv; + sys_dnode_init(&vserial->node); + sys_dlist_append(&virt_serial_ctrl.virt_serial_list, &vserial->node); + k_mutex_unlock(&virt_serial_ctrl.virt_serial_list_lock); + ZVM_LOG_INFO("Create virt_serial:%s for %s\n",name, v_s->vm->vm_name); + + return vserial; +} + +int virt_serial_destroy(struct virt_serial *vserial) +{ + const struct shell *vs_shell = shell_backend_uart_get_ptr(); + vs_shell->ctx->bypass = NULL; + sys_dlist_remove(&vserial->node); + k_free(vserial); + + return 0; +} + +static void vserial_it_emulator_thread(void *ctrl, void *arg2, void *arg3) +{ + struct virt_pl011 *vpl011; + while (1) { + k_sem_take(&connect_vm_sem, K_FOREVER); + vpl011 = (struct virt_pl011 *)virt_serial_ctrl.connecting_virt_serial->priv; + while (virt_serial_ctrl.connecting) { + if (vpl011->enabled & vpl011->level) { + set_virq_to_vm(virt_serial_ctrl.connecting_virt_serial->vm,vpl011->irq); + } + k_sleep(K_MSEC(1)); + } + } +} + +static void init_vserial_it_emulator_thread(void) +{ + k_tid_t tid; + tid = k_thread_create(&tx_it_emulator_thread_data, tx_it_emulator_thread_stack, + K_THREAD_STACK_SIZEOF(tx_it_emulator_thread_stack), + vserial_it_emulator_thread, NULL, NULL, NULL, + K_PRIO_COOP(7), 0, K_NO_WAIT); + k_thread_name_set(tid, "vserial_it_emulator"); +} + +static int virt_serial_ctrl_init(void) +{ + memset(&virt_serial_ctrl, 0, sizeof(virt_serial_ctrl)); + + k_mutex_init(&virt_serial_ctrl.virt_serial_list_lock); + sys_dlist_init(&virt_serial_ctrl.virt_serial_list); + init_vserial_it_emulator_thread(); + + return 0; +} + +SYS_INIT(virt_serial_ctrl_init, POST_KERNEL, CONFIG_VIRT_SERIAL_CTRL_INIT_PRIORITY); + + + +void uart_poll_out_to_host(unsigned char data) +{ + const struct shell *vs_shell = shell_backend_uart_get_ptr(); + const struct device *dev=((struct shell_uart_common *)vs_shell->iface->ctx)->dev; + uart_poll_out(dev,data); +} + +void transfer(const struct shell *vs_shell, uint8_t *data, size_t len) +{ + uint8_t *rdata; + + if (data[0] == EXIT_VSERIAL_KEY) { + shell_set_bypass(vs_shell, NULL); + ((struct virt_pl011 *)(virt_serial_ctrl.connecting_virt_serial->priv))->connecting= false; + virt_serial_ctrl.connecting = false; + virt_serial_ctrl.connecting_vm_id = 0; + rdata = k_malloc(sizeof(uint8_t)); + *rdata = '\n'; + uart_poll_out_to_host('\n'); + }else { + virt_serial_ctrl.connecting_virt_serial->send(virt_serial_ctrl.connecting_virt_serial, data, len); + } +} + +int switch_virtual_serial_handler(const struct shell *vs_shell, size_t argc, char **argv) +{ + uint8_t id; + uint8_t *data; + struct virt_serial *serial; + struct shell_uart_int_driven *shell_uart; + + if (argc > 1) { + if (argv[1][1]!='\0') { + ZVM_LOG_WARN("Only supports VM ID with a length of 1.\n"); + return 0; + } + id = *argv[1]; + if (id > '9' || id < '0') { + ZVM_LOG_WARN("Invalid VM ID %c\n",id); + return 0; + } + id = id - 48; + if (id > CONFIG_MAX_VM_NUM - 1) { + ZVM_LOG_WARN("Max VM ID is %d\n", CONFIG_MAX_VM_NUM - 1); + return 0; + } + if (!(BIT(id) & zvm_overall_info->alloced_vmid)) { + ZVM_LOG_WARN("VM ID %d not alloced\n",id); + return 0; + } else { + serial = get_vserial(id); + ((struct virt_pl011 *)(serial->priv))->connecting = true; + virt_serial_ctrl.connecting = true; + virt_serial_ctrl.connecting_vm_id = id; + virt_serial_ctrl.connecting_virt_serial = serial; + } + + if (virt_serial_ctrl.connecting) { + shell_set_bypass(vs_shell, transfer); + k_sem_give(&connect_vm_sem); + data = k_malloc(sizeof(uint8_t)); + *data = '\r'; + shell_uart = (struct shell_uart_int_driven *)vs_shell->iface->ctx; + ring_buf_put(&shell_uart->rx_ringbuf, data, 1); + shell_fprintf(vs_shell, SHELL_VT100_COLOR_YELLOW, "Connecting VM ID:%d\n", id); + } + } else { + ZVM_LOG_INFO("Reachable virtual serial:\n"); + virt_serial_count(); + } + + return 0; +} diff --git a/subsys/zvm/vm.c b/subsys/zvm/vm.c new file mode 100644 index 00000000000000..ec737dc7839c59 --- /dev/null +++ b/subsys/zvm/vm.c @@ -0,0 +1,522 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../lib/posix/options/getopt/getopt.h" + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +extern struct zvm_manage_info *zvm_overall_info; + +static int intra_vm_msg_handler(struct z_vm *vm) +{ + ARG_UNUSED(vm); + struct z_vcpu *vcpu = _current_vcpu; + if (!vcpu) { + ZVM_LOG_WARN("Get current vcpu failed! \n"); + return -ENODEV; + } + set_virq_to_vcpu(vcpu, vcpu->virq_block.pending_sgi_num); + + return 0; +} + +static int pause_vm_handler(struct z_vm *vm) +{ + ARG_UNUSED(vm); + return 0; +} + +static int stop_vm_handler(struct z_vm *vm) +{ + ARG_UNUSED(vm); + return 0; +} + +static void list_vm_info(uint16_t vmid) +{ + char *vm_ss; + int mem_size = 0; + struct z_vm *vm = zvm_overall_info->vms[vmid]; + + if (!vm) { + ZVM_LOG_WARN("Invalid vmid!\n"); + return; + } + + /* Get the vm's status */ + switch (vm->vm_status) { + case VM_STATE_RUNNING: + vm_ss = "running"; + break; + case VM_STATE_PAUSE: + vm_ss = "pausing"; + break; + case VM_STATE_NEVER_RUN: + vm_ss = "Ready"; + break; + case VM_STATE_HALT: + vm_ss = "stopping"; + break; + case VM_STATE_RESET: + vm_ss = "reset"; + break; + default: + ZVM_LOG_WARN("This vm status is invalid!\n"); + return; + } + + mem_size = vm->os->info.vm_mem_size / (1024*1024); + printk("|***%d %s\t%d\t%d \t%s ***| \n", vm->vmid, + vm->vm_name, vm->vcpu_num, mem_size, vm_ss); + +} + +static void list_all_vms_info(void) +{ + uint16_t i; + + printk("\n|******************** All VMS INFO *******************|\n"); + printk("|***vmid name \t vcpus vmem(M)\tstatus ***|\n"); + for(i = 0; i < CONFIG_MAX_VM_NUM; i++){ + if(BIT(i) & zvm_overall_info->alloced_vmid) + list_vm_info(i); + } +} + +int vm_ipi_handler(struct z_vm *vm) +{ + int ret; + uint32_t vm_status; + + vm_status = vm->vm_status; + switch (vm_status) { + case VM_STATE_RUNNING: + ret = intra_vm_msg_handler(vm); + break; + case VM_STATE_PAUSE: + ret = pause_vm_handler(vm); + break; + case VM_STATE_HALT: + ret = stop_vm_handler(vm); + break; + default: + ret = -1; + break; + } + + return ret; +} + +int vm_mem_init(struct z_vm *vm) +{ + int ret = 0; + struct vm_mem_domain *vmem_dm = vm->vmem_domain; + + if (vmem_dm->is_init) { + ZVM_LOG_WARN("VM's mem has been init before! \n"); + return -ENXIO; + } + + ret = vm_mem_domain_partitions_add(vmem_dm); + if (ret) { + ZVM_LOG_WARN("Add partition to domain failed!, Code: %d \n", ret); + return ret; + } + + return 0; +} + +int vm_create(struct z_os_info *vm_info, struct z_vm *new_vm) +{ + int ret = 0; + struct z_vm *vm = new_vm; + + /* init vmid here, this vmid is for vm level*/ + vm->vmid = allocate_vmid(vm_info); + if (vm->vmid >= CONFIG_MAX_VM_NUM) { + return -EOVERFLOW; + } + /* init vm*/ + vm->reboot = false; + + vm->os = (struct z_os *)k_malloc(sizeof(struct z_os)); + if (!vm->os) { + ZVM_LOG_WARN("Allocate memory for os error! \n"); + return -ENOMEM; + } + + ret = vm_os_create(vm->os, vm_info); + if (ret) { + ZVM_LOG_WARN("Unknow os type! \n"); + return ret; + } + + ret = vm_mem_domain_create(vm); + if (ret) { + ZVM_LOG_WARN("vm_mem_domain_create failed! \n"); + return ret; + } + + ret = vm_vcpus_create(vm_info->vcpu_num, vm); + if (ret) { + ZVM_LOG_WARN("vm_vcpus_create failed!"); + return ret; + } + + vm->arch = (struct vm_arch *)k_malloc(sizeof(struct vm_arch)); + if (!vm->arch) { + ZVM_LOG_WARN("Allocate mm memory for vm arch struct failed!"); + return -ENXIO; + } + + vm->ops = (struct zvm_ops *)k_malloc(sizeof(struct zvm_ops)); + if (!vm->ops) { + ZVM_LOG_WARN("Allocate mm memory for vm ops struct failed!"); + return -ENXIO; + } + + vm->vm_vcpu_id_count.count = 0; + ZVM_SPINLOCK_INIT(&vm->vm_vcpu_id_count.vcpu_id_lock); + ZVM_SPINLOCK_INIT(&vm->spinlock); + + char vmid_str[4]; + uint16_t vmid_str_len = sprintf(vmid_str, "-%d", vm->vmid); + if (vmid_str_len > 4) { + ZVM_LOG_WARN("Sprintf put error, may cause str overflow!\n"); + vmid_str[3] = '\0'; + } else { + vmid_str[vmid_str_len] = '\0'; + } + + if (strcpy(vm->vm_name, vm->os->name) == NULL || strcat(vm->vm_name, vmid_str) == NULL) { + ZVM_LOG_WARN("VM name init error! \n"); + return -EIO; + } + + /* set vm status here */ + vm->vm_status = VM_STATE_NEVER_RUN; + + /* store zvm overall info */ + zvm_overall_info->vms[vm->vmid] = vm; + vm->arch->vm_pgd_base = (uint64_t) + vm->vmem_domain->vm_mm_domain->arch.ptables.base_xlat_table; + + return 0; +} + +int vm_ops_init(struct z_vm *vm) +{ + /* According to OS type to bind vm_ops. We need to add operation func@TODO*/ + return 0; +} + +static uint16_t get_vmid_by_id(size_t argc, char **argv) +{ + uint16_t vm_id = CONFIG_MAX_VM_NUM; + int opt; + char *optstring = "t:n:"; + struct getopt_state *state; + + /* Initialize the global state */ + getopt_init(); + /* Get Current getopt_state */ + state = getopt_state_get(); + + while ((opt = getopt(argc, argv, optstring)) != -1) { + switch (opt) { + case 'n': + vm_id = (uint16_t)(state->optarg[0] - '0'); + break; + default: + ZVM_LOG_WARN("Input number invalid, Please input a valid vmid after \"-n\" command! \n"); + return -EINVAL; + } + } + return vm_id; +} + +int vm_vcpus_create(uint16_t vcpu_num, struct z_vm *vm) +{ + /* init vcpu num */ + if (vcpu_num > CONFIG_MAX_VCPU_PER_VM) { + vcpu_num = CONFIG_MAX_VCPU_PER_VM; + ZVM_LOG_WARN("Vcpu num is too big, set it to max vcpu num: %d\n", vcpu_num); + } + vm->vcpu_num = vcpu_num; + + /* allocate vcpu list here */ + vm->vcpus = (struct z_vcpu **)k_malloc(vcpu_num * sizeof(struct z_vcpu *)); + if (!(vm->vcpus)) { + ZVM_LOG_WARN("Vcpus struct init error !\n"); + return -ENXIO; + } + + vm->vcpu_exit_sem = (struct k_sem *)k_malloc(sizeof(struct k_sem)); + if(!(vm->vcpu_exit_sem)) { + ZVM_LOG_WARN("Vcpu exit sem init error! \n"); + return -ENXIO; + } + + return 0; +} + +int vm_vcpus_init(struct z_vm *vm) +{ + char vcpu_name[VCPU_NAME_LEN]; + int i; + struct z_vcpu *vcpu; + + if (vm->vcpu_num > CONFIG_MAX_VCPU_PER_VM) { + ZVM_LOG_WARN("Vcpu counts is too big!"); + return -ESRCH; + } + + for(i = 0; i < vm->vcpu_num; i++){ + memset(vcpu_name, 0, VCPU_NAME_LEN); + snprintk(vcpu_name, VCPU_NAME_LEN - 1, "%s-vcpu%d", vm->vm_name, i); + + vcpu = vm_vcpu_init(vm, i, vcpu_name); + if(!vcpu) { + ZVM_LOG_WARN("Vcpu-%d init failed! \n", i); + return -ENODEV; + } + + sys_dlist_init(&vcpu->vcpu_lists); + vm->vcpus[i] = vcpu; + vcpu->next_vcpu = NULL; + if (i) { + vm->vcpus[i-1]->next_vcpu = vcpu; + } + + k_sem_init(&vm->vcpu_exit_sem[i], 0, 1); + + vcpu->is_poweroff = true; + if (i == 0) { + vcpu->is_poweroff = false; + } + } + + return 0; +} + +int vm_vcpus_ready(struct z_vm *vm) +{ + uint16_t i=0; + struct z_vcpu *vcpu; + struct k_thread *thread; + k_spinlock_key_t key; + ARG_UNUSED(thread); + + key = k_spin_lock(&vm->spinlock); + for(i = 0; i < vm->vcpu_num; i++){ + /* find the vcpu struct */ + vcpu = vm->vcpus[i]; + if (vcpu == NULL) { + ZVM_LOG_WARN("vm error here, can't find vcpu: vcpu-%d", i); + k_spin_unlock(&vm->spinlock, key); + return -ENODEV; + } + if (!vcpu->is_poweroff) { + vm_vcpu_ready(vcpu); + } + } + vm->vm_status = VM_STATE_RUNNING; + k_spin_unlock(&vm->spinlock, key); + + return 0; +} + +int vm_vcpus_pause(struct z_vm *vm) +{ + uint16_t i=0; + struct z_vcpu *vcpu; + k_spinlock_key_t key; + + key = k_spin_lock(&vm->spinlock); + for(i = 0; i < vm->vcpu_num; i++){ + vcpu = vm->vcpus[i]; + if (vcpu == NULL) { + ZVM_LOG_WARN("Pause vm error here, can't find vcpu: vcpu-%d \n", i); + k_spin_unlock(&vm->spinlock, key); + return -ENODEV; + } + vm_vcpu_pause(vcpu); + } + + vm->vm_status = VM_STATE_PAUSE; + k_spin_unlock(&vm->spinlock, key); + return 0; +} + +int vm_vcpus_halt(struct z_vm *vm) +{ + uint16_t i=0; + struct z_vcpu *vcpu; + struct k_thread *thread; + k_spinlock_key_t key; + ARG_UNUSED(thread); + + key = k_spin_lock(&vm->spinlock); + for(i = 0; i < vm->vcpu_num; i++){ + /* find the vcpu struct */ + vcpu = vm->vcpus[i]; + if (vcpu == NULL) { + ZVM_LOG_WARN("vm error here, can't find vcpu: vcpu-%d", i); + k_spin_unlock(&vm->spinlock, key); + return -ENODEV; + } + vm_vcpu_halt(vcpu); + } + vm->vm_status = VM_STATE_HALT; + k_spin_unlock(&vm->spinlock, key); + + return 0; +} + +int vm_vcpus_reset(struct z_vm *vm) +{ + uint16_t i=0; + struct z_vcpu *vcpu; + k_spinlock_key_t key; + + key = k_spin_lock(&vm->spinlock); + for(i = 0; i < vm->vcpu_num; i++) { + vcpu = vm->vcpus[i]; + if (vcpu == NULL) { + ZVM_LOG_WARN("Pause vm error here, can't find vcpu: vcpu-%d \n", i); + k_spin_unlock(&vm->spinlock, key); + return -ENODEV; + } + vm_vcpu_reset(vcpu); + } + + vm->vm_status = VM_STATE_RESET; + load_os_image(vm); + k_spin_unlock(&vm->spinlock, key); + return 0; +} + +int vm_delete(struct z_vm *vm) +{ + int ret = 0; + k_spinlock_key_t key; + struct vm_mem_domain *vmem_dm = vm->vmem_domain; + struct z_vcpu *vcpu; + + key = k_spin_lock(&vm->spinlock); + /* delete vdev struct */ + ret = vm_device_deinit(vm); + if (ret) { + ZVM_LOG_WARN("Delete vm devices failed! \n"); + } + + /* remove all the partition in the vmem_domain */ + ret = vm_mem_apart_remove(vmem_dm); + if(ret) { + ZVM_LOG_WARN("Delete vm mem domain failed! \n"); + } + + /* delete vcpu struct */ + for(int i = 0; i < vm->vcpu_num; i++){ + vcpu = vm->vcpus[i]; + if(!vcpu) { + continue; + } + vm_vcpu_deinit(vcpu); + } + + if(vm->vcpu_exit_sem) { + k_free(vm->vcpu_exit_sem); + } + + k_free(vm->ops); + k_free(vm->arch); + k_free(vm->vcpus); + k_free(vm->vmem_domain); + if(vm->os->name) { + k_free(vm->os->name); + } + k_free(vm->os); + zvm_overall_info->vms[vm->vmid] = NULL; + k_free(vm); + k_spin_unlock(&vm->spinlock, key); + + zvm_overall_info->vm_total_num--; + zvm_overall_info->alloced_vmid &= ~BIT(vm->vmid); + return 0; +} + +int z_parse_new_vm_args(size_t argc, char **argv, struct z_os_info *vm_info, + struct z_vm *vm) +{ + int ret = 0; + int opt; + char *optstring = "t:n:"; + struct getopt_state *state; + + getopt_init(); + state = getopt_state_get(); + + while ((opt = getopt(argc, argv, optstring)) != -1) { + switch (opt) { + case 't': + ret = get_os_info_by_type(vm_info); + continue; + case 'n': + /* TODO: support allocate vmid chosen by user later */ + default: + ZVM_LOG_WARN("Input error! \n"); + ZVM_LOG_WARN("Please input \" zvm new -t + os_name \" command to new a vm! \n"); + return -EINVAL; + } + } + + return ret; +} + +int z_parse_run_vm_args(size_t argc, char **argv) +{ + return get_vmid_by_id(argc, argv); +} + +int z_parse_pause_vm_args(size_t argc, char **argv) +{ + return get_vmid_by_id(argc, argv); +} + +int z_parse_delete_vm_args(size_t argc, char **argv) +{ + return get_vmid_by_id(argc, argv); +} + +int z_parse_info_vm_args(size_t argc, char **argv) +{ + return get_vmid_by_id(argc, argv); +} + +int z_list_vms_info(uint16_t vmid) +{ + /* list all vm */ + list_all_vms_info(); + printk("|*****************************************************|\n"); + return 0; +} + +int vm_sysinfo_init(size_t argc, char **argv, struct z_vm *vm_ptr, struct z_os_info *vm_info) +{ + return z_parse_new_vm_args(argc, argv, vm_info, vm_ptr); +} diff --git a/subsys/zvm/vm_cpu.c b/subsys/zvm/vm_cpu.c new file mode 100644 index 00000000000000..03663512592838 --- /dev/null +++ b/subsys/zvm/vm_cpu.c @@ -0,0 +1,481 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include <../../kernel/include/timeout_q.h> + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +/** + * @brief Construct a new vcpu virt irq block. Setting + * a default description. + * TODO: all the local irq is inited here, may should be + * init when vtimer init. + */ +static void init_vcpu_virt_irq_desc(struct vcpu_virt_irq_block *virq_block) +{ + int i; + struct virt_irq_desc *desc; + for(i = 0; i < VM_LOCAL_VIRQ_NR; i++){ + desc = &virq_block->vcpu_virt_irq_desc[i]; + desc->id = VM_INVALID_DESC_ID; + desc->pirq_num = i; + desc->virq_num = i; + desc->prio = 0; + desc->vdev_trigger = 0; + desc->vcpu_id = DEFAULT_VCPU; + desc->virq_flags = 0; + desc->virq_states = 0; + desc->vm_id = DEFAULT_VM; + + sys_dnode_init(&(desc->desc_node)); + } +} + +static void save_vcpu_context(struct k_thread *thread) +{ + arch_vcpu_context_save(thread->vcpu_struct); +} + +static void load_vcpu_context(struct k_thread *thread) +{ + struct z_vcpu *vcpu = thread->vcpu_struct; + + arch_vcpu_context_load(thread->vcpu_struct); + + vcpu->resume_signal = false; +} + +static void vcpu_timer_event_pause(struct z_vcpu *vcpu) +{ + struct virt_timer_context *timer_ctxt = vcpu->arch->vtimer_context; + + z_abort_timeout(&timer_ctxt->vtimer_timeout); + z_abort_timeout(&timer_ctxt->ptimer_timeout); +} + +static void vcpu_context_switch(struct k_thread *new_thread, + struct k_thread *old_thread) +{ + struct z_vcpu *old_vcpu; + + if (VCPU_THREAD(old_thread)) { + old_vcpu = old_thread->vcpu_struct; + + save_vcpu_context(old_thread); + switch (old_vcpu->vcpu_state) { + case _VCPU_STATE_RUNNING: + old_vcpu->vcpu_state = _VCPU_STATE_READY; + break; + case _VCPU_STATE_RESET: + ZVM_LOG_WARN("Do not support vm reset! \n"); + break; + case _VCPU_STATE_PAUSED: + vcpu_timer_event_pause(old_vcpu); + vm_vdev_pause(old_vcpu); + break; + case _VCPU_STATE_HALTED: + vcpu_timer_event_pause(old_vcpu); + vm_vdev_pause(old_vcpu); + break; + default: + break; + } + } + + if (VCPU_THREAD(new_thread)) { + struct z_vcpu *new_vcpu = new_thread->vcpu_struct; + + if (new_vcpu->vcpu_state != _VCPU_STATE_READY) { + ZVM_LOG_ERR("vCPU is not ready, something may be wrong.\n"); + } + + load_vcpu_context(new_thread); + new_vcpu->vcpu_state = _VCPU_STATE_RUNNING; + } +} + +static void vcpu_state_to_ready(struct z_vcpu *vcpu) +{ + uint16_t cur_state = vcpu->vcpu_state; + struct k_thread *thread = vcpu->work->vcpu_thread; + + vcpu->hcpu_cycles = sys_clock_cycle_get_32(); + + switch (cur_state) + { + case _VCPU_STATE_UNKNOWN: + case _VCPU_STATE_READY: + k_thread_start(thread); + vcpu->vcpu_state = _VCPU_STATE_READY; + break; + case _VCPU_STATE_RUNNING: + vcpu->resume_signal = true; + break; + case _VCPU_STATE_RESET: + case _VCPU_STATE_PAUSED: + k_wakeup(thread); + break; + default: + ZVM_LOG_WARN("Invalid cpu state! \n"); + break; + } + +} + +static void vcpu_state_to_running(struct z_vcpu *vcpu) +{ + ARG_UNUSED(vcpu); + ZVM_LOG_WARN("No thing to do, running state may be auto switched. \n"); +} + +static void vcpu_state_to_reset(struct z_vcpu *vcpu) +{ + uint16_t cur_state = vcpu->vcpu_state; + struct k_thread *thread = vcpu->work->vcpu_thread; + + switch (cur_state) + { + case _VCPU_STATE_READY: + move_thread_to_end_of_prio_q(thread); +#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED) + arch_sched_broadcast_ipi(); +#endif + break; + case _VCPU_STATE_RESET: + break; + case _VCPU_STATE_RUNNING: + case _VCPU_STATE_PAUSED: + arch_vcpu_init(vcpu); + break; + default: + ZVM_LOG_WARN("Invalid cpu state here. \n"); + break; + } + vcpu->resume_signal = false; + +} + +static void vcpu_state_to_paused(struct z_vcpu *vcpu) +{ + bool resumed = false; + uint16_t cur_state = vcpu->vcpu_state; + struct k_thread *thread = vcpu->work->vcpu_thread; + + switch (cur_state) { + case _VCPU_STATE_READY: + case _VCPU_STATE_RUNNING: + resumed = vcpu->resume_signal; + vcpu->resume_signal = false; + if (resumed && vcpu->waitq_flag) { + vcpu_timer_event_pause(vcpu); + } + thread->base.thread_state |= _THREAD_SUSPENDED; + dequeue_ready_thread(thread); + break; + case _VCPU_STATE_RESET: + case _VCPU_STATE_PAUSED: + default: + ZVM_LOG_WARN("Invalid cpu state. \n"); + break; + } + +} + +static void vcpu_state_to_halted(struct z_vcpu *vcpu) +{ + uint16_t cur_state = vcpu->vcpu_state; + struct k_thread *thread = vcpu->work->vcpu_thread; + + switch (cur_state) { + case _VCPU_STATE_READY: + case _VCPU_STATE_RUNNING: + case _VCPU_STATE_PAUSED: + thread->base.thread_state |= _THREAD_VCPU_NO_SWITCH; + break; + case _VCPU_STATE_RESET: + case _VCPU_STATE_UNKNOWN: + vm_delete(vcpu->vm); + break; + default: + ZVM_LOG_WARN("Invalid cpu state here. \n"); + break; + } + vcpu_ipi_scheduler(VCPU_IPI_MASK_ALL, 0); + +} + +static void vcpu_state_to_unknown(struct z_vcpu *vcpu) +{ + ARG_UNUSED(vcpu); +} + +/** + * @brief Vcpu scheduler for switch vcpu to different states. + */ +int vcpu_state_switch(struct k_thread *thread, uint16_t new_state) +{ + int ret = 0; + struct z_vcpu *vcpu = thread->vcpu_struct; + uint16_t cur_state = vcpu->vcpu_state; + + if (cur_state == new_state) { + return ret; + } + switch (new_state) { + case _VCPU_STATE_READY: + vcpu_state_to_ready(vcpu); + break; + case _VCPU_STATE_RUNNING: + vcpu_state_to_running(vcpu); + break; + case _VCPU_STATE_RESET: + vcpu_state_to_reset(vcpu); + break; + case _VCPU_STATE_PAUSED: + vcpu_state_to_paused(vcpu); + break; + case _VCPU_STATE_HALTED: + vcpu_state_to_halted(vcpu); + break; + case _VCPU_STATE_UNKNOWN: + vcpu_state_to_unknown(vcpu); + break; + default: + ZVM_LOG_ERR("Invalid state here. \n"); + ret = EINVAL; + break; + } + vcpu->vcpu_state = new_state; + + return ret; + +} + +void do_vcpu_swap(struct k_thread *new_thread, struct k_thread *old_thread) +{ + struct z_vcpu *vcpu; + ARG_UNUSED(vcpu); + + if(new_thread == old_thread){ + return; + } + +#ifdef CONFIG_SMP + vcpu_context_switch(new_thread, old_thread); +#else + if (old_thread && VCPU_THREAD(old_thread)) { + save_vcpu_context(old_thread); + } + if (new_thread && VCPU_THREAD(new_thread)) { + load_vcpu_context(new_thread); + } +#endif /* CONFIG_SMP */ +} + +void do_asm_vcpu_swap(struct k_thread *new_thread, struct k_thread *old_thread) +{ + if (!vcpu_need_switch(new_thread, old_thread)) { + return ; + } + do_vcpu_swap(new_thread, old_thread); +} + +int vcpu_ipi_scheduler(uint32_t cpu_mask, uint32_t timeout) +{ + ARG_UNUSED(timeout); + uint32_t mask = cpu_mask; + + switch (mask) { + case VCPU_IPI_MASK_ALL: +#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED) + arch_sched_broadcast_ipi(); +#else + ZVM_LOG_WARN("Not smp ipi support."); +#endif /* CONFIG_SMP && CONFIG_SCHED_IPI_SUPPORTED */ + break; + default: + break; + } + + return 0; +} + +int vcpu_thread_entry(struct z_vcpu *vcpu) +{ + int ret = 0; + + do { + ret = arch_vcpu_run(vcpu); + + if (vcpu->vm->vm_status == VM_STATE_HALT) { + /*TODO: Disable all the allocated irq. */ + arch_vcpu_timer_deinit(vcpu); + break; + } + + } while(ret >= 0); + + k_sem_give(&vcpu->vm->vcpu_exit_sem[vcpu->vcpu_id]); + + return ret; +} + +struct z_vcpu *vm_vcpu_init(struct z_vm *vm, uint16_t vcpu_id, char *vcpu_name) +{ + uint16_t vm_prio; + int pcpu_num = 0; + struct z_vcpu *vcpu; + struct vcpu_work *vwork; + k_spinlock_key_t key; + + vcpu = (struct z_vcpu *)k_malloc(sizeof(struct z_vcpu)); + if (!vcpu) { + ZVM_LOG_ERR("Allocate vcpu space failed"); + return NULL; + } + + vcpu->arch = (struct vcpu_arch *)k_malloc(sizeof(struct vcpu_arch)); + if (!vcpu->arch) { + ZVM_LOG_ERR("Init vcpu->arch failed"); + k_free(vcpu); + return NULL; + } + + /* init vcpu virt irq block. */ + vcpu->virq_block.virq_pending_counts = 0; + vcpu->virq_block.vwfi.priv = NULL; + vcpu->virq_block.vwfi.state = false; + vcpu->virq_block.vwfi.yeild_count = 0; + ZVM_SPINLOCK_INIT(&vcpu->virq_block.vwfi.wfi_lock); + sys_dlist_init(&vcpu->virq_block.pending_irqs); + sys_dlist_init(&vcpu->virq_block.active_irqs); + ZVM_SPINLOCK_INIT(&vcpu->virq_block.spinlock); + init_vcpu_virt_irq_desc(&vcpu->virq_block); + ZVM_SPINLOCK_INIT(&vcpu->vcpu_lock); + + if (vm->os->is_rtos) { + vm_prio = VCPU_RT_PRIO; + }else{ + vm_prio = VCPU_NORT_PRIO; + } + vcpu->vm = vm; + + /* vt_stack must be aligned, So we allocate memory with aligned block */ + vwork = (struct vcpu_work *)k_aligned_alloc(0x10, sizeof(struct vcpu_work)); + if (!vwork) { + ZVM_LOG_ERR("Create vwork error!"); + return NULL; + } + + /* init tast_vcpu_thread struct here */ + vwork->vcpu_thread = (struct k_thread *)k_malloc(sizeof(struct k_thread)); + if (!vwork->vcpu_thread) { + ZVM_LOG_ERR("Init thread struct error here!"); + return NULL; + } + /*TODO: In this stage, the thread is marked as a kernel thread, + For system safe, we will modified it later.*/ + k_tid_t tid = k_thread_create(vwork->vcpu_thread, vwork->vt_stack, + VCPU_THREAD_STACKSIZE, (void *)vcpu_thread_entry, vcpu, NULL, NULL, + vm_prio, 0, K_FOREVER); + strcpy(tid->name, vcpu_name); + + /* SMP support*/ +#ifdef CONFIG_SCHED_CPU_MASK + /** + * Due to the default 'new_thread->base.cpu_mask=1', + * BIT(0) must be cleared when enable other mask bit + * when CONFIG_SCHED_CPU_MASK_PIN_ONLY=y. + */ + k_thread_cpu_mask_disable(tid, 0); + + pcpu_num = get_static_idle_cpu(); + if (pcpu_num < 0 || pcpu_num >= CONFIG_MP_NUM_CPUS) { + ZVM_LOG_WARN("No suitable idle cpu for VM! \n"); + return NULL; + } + + k_thread_cpu_mask_enable(tid, pcpu_num); + vcpu->cpu = pcpu_num; +#else + vcpu->cpu = pcpu_num; +#endif /* CONFIG_SCHED_CPU_MASK */ + + /* create a new thread and store it in work struct */ + vwork->v_date = vcpu; + vwork->vcpu_thread->vcpu_struct = vcpu; + + vcpu->work = vwork; + /* init vcpu timer*/ + vcpu->hcpu_cycles = 0; + vcpu->runnig_cycles = 0; + vcpu->paused_cycles = 0; + vcpu->vcpu_id = vcpu_id; + vcpu->vcpu_state = _VCPU_STATE_UNKNOWN; + vcpu->exit_type = 0; + vcpu->vcpuipi_count = 0; + vcpu->resume_signal = false; + vcpu->waitq_flag = false; + + key = k_spin_lock(&vcpu->vm->vm_vcpu_id_count.vcpu_id_lock); + vcpu->vcpu_id = vcpu->vm->vm_vcpu_id_count.count++; + k_spin_unlock(&vcpu->vm->vm_vcpu_id_count.vcpu_id_lock, key); + + if (arch_vcpu_init(vcpu)) { + k_free(vcpu); + return NULL; + } + + return vcpu; +} + +int vm_vcpu_deinit(struct z_vcpu *vcpu) +{ + int ret = 0; + + ret = arch_vcpu_deinit(vcpu); + if (ret) { + ZVM_LOG_WARN("Deinit arch vcpu error!"); + return ret; + } + + reset_idle_cpu(vcpu->cpu); + k_free(vcpu->work); + k_free(vcpu->arch); + k_free(vcpu); + + return ret; +} + +int vm_vcpu_ready(struct z_vcpu *vcpu) +{ + return vcpu_state_switch(vcpu->work->vcpu_thread, _VCPU_STATE_READY); +} + +int vm_vcpu_pause(struct z_vcpu *vcpu) +{ + return vcpu_state_switch(vcpu->work->vcpu_thread, _VCPU_STATE_PAUSED); +} + +int vm_vcpu_halt(struct z_vcpu *vcpu) +{ + return vcpu_state_switch(vcpu->work->vcpu_thread, _VCPU_STATE_HALTED); +} + +int vm_vcpu_reset(struct z_vcpu *vcpu) +{ + return vcpu_state_switch(vcpu->work->vcpu_thread, _VCPU_STATE_RESET); +} diff --git a/subsys/zvm/vm_device.c b/subsys/zvm/vm_device.c new file mode 100644 index 00000000000000..d031a9641b5ccd --- /dev/null +++ b/subsys/zvm/vm_device.c @@ -0,0 +1,386 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +#define DEV_CFG(dev) \ + ((const struct virt_device_config * const)(dev)->config) +#define DEV_DATA(dev) \ + ((struct virt_device_data *)(dev)->data) + +static struct device_chosen vm_device_chosen; + +int __weak vm_init_bdspecific_device(struct z_vm *vm) +{ + return 0; +} + +static int vm_vdev_mem_add(struct z_vm *vm, struct z_virt_dev *vdev) +{ + uint32_t attrs = 0; + + /*If device is emulated, set access off attrs*/ + if (vdev->dev_pt_flag && !vdev->shareable) { + attrs = MT_VM_DEVICE_MEM; + }else{ + attrs = MT_VM_DEVICE_MEM | MT_S2_ACCESS_OFF; + } + + return vm_vdev_mem_create(vm->vmem_domain, vdev->vm_vdev_paddr, + vdev->vm_vdev_vaddr, vdev->vm_vdev_size, attrs); + +} + +struct z_virt_dev *vm_virt_dev_add_no_memmap(struct z_vm *vm, const char *dev_name, bool pt_flag, + bool shareable, uint64_t dev_pbase, uint64_t dev_hva, + uint32_t dev_size, uint32_t dev_hirq, uint32_t dev_virq) +{ + uint16_t name_len; + struct z_virt_dev *vm_dev; + + vm_dev = (struct z_virt_dev *)k_malloc(sizeof(struct z_virt_dev)); + if (!vm_dev) { + return NULL; + } + + name_len = strlen(dev_name); + name_len = name_len > VIRT_DEV_NAME_LENGTH ? VIRT_DEV_NAME_LENGTH : name_len; + strncpy(vm_dev->name, dev_name, name_len); + vm_dev->name[name_len] = '\0'; + + vm_dev->dev_pt_flag = pt_flag; + vm_dev->shareable = shareable; + vm_dev->vm_vdev_paddr = dev_pbase; + vm_dev->vm_vdev_vaddr = dev_hva; + vm_dev->vm_vdev_size = dev_size; + vm_dev->virq = dev_virq; + vm_dev->hirq = dev_hirq; + vm_dev->vm = vm; + + /* Init private data and vdev */ + vm_dev->priv_data = NULL; + vm_dev->priv_vdev = NULL; + + sys_dlist_append(&vm->vdev_list, &vm_dev->vdev_node); + + return vm_dev; +} + +struct z_virt_dev *vm_virt_dev_add(struct z_vm *vm, const char *dev_name, bool pt_flag, + bool shareable, uint64_t dev_pbase, uint64_t dev_vbase, + uint32_t dev_size, uint32_t dev_hirq, uint32_t dev_virq) +{ + uint16_t name_len; + int ret; + struct z_virt_dev *vm_dev; + + vm_dev = (struct z_virt_dev *)k_malloc(sizeof(struct z_virt_dev)); + if (!vm_dev) { + return NULL; + } + + name_len = strlen(dev_name); + name_len = name_len > VIRT_DEV_NAME_LENGTH ? VIRT_DEV_NAME_LENGTH : name_len; + strncpy(vm_dev->name, dev_name, name_len); + vm_dev->name[name_len] = '\0'; + + vm_dev->dev_pt_flag = pt_flag; + vm_dev->shareable = shareable; + vm_dev->vm_vdev_paddr = dev_pbase; + vm_dev->vm_vdev_vaddr = dev_vbase; + vm_dev->vm_vdev_size = dev_size; + + ret = vm_vdev_mem_add(vm, vm_dev); + if(ret){ + return NULL; + } + vm_dev->virq = dev_virq; + vm_dev->hirq = dev_hirq; + vm_dev->vm = vm; + + /*Init private data and vdev*/ + vm_dev->priv_data = NULL; + vm_dev->priv_vdev = NULL; + + sys_dnode_init(&vm_dev->vdev_node); + sys_dlist_append(&vm->vdev_list, &vm_dev->vdev_node); + + return vm_dev; +} + +int vm_virt_dev_remove(struct z_vm *vm, struct z_virt_dev *vm_dev) +{ + struct zvm_dev_lists* vdev_list; + struct z_virt_dev *chosen_dev = NULL; + struct _dnode *d_node, *ds_node; + + sys_dlist_remove(&vm_dev->vdev_node); + + vdev_list = get_zvm_dev_lists(); + SYS_DLIST_FOR_EACH_NODE_SAFE(&vdev_list->dev_used_list, d_node, ds_node) { + chosen_dev = CONTAINER_OF(d_node, struct z_virt_dev, vdev_node); + if(chosen_dev->vm_vdev_paddr == vm_dev->vm_vdev_paddr) { + sys_dlist_remove(&chosen_dev->vdev_node); + sys_dlist_append(&vdev_list->dev_idle_list, &chosen_dev->vdev_node); + break; + } + } + + k_free(vm_dev); + return 0; +} + +int vdev_mmio_abort(arch_commom_regs_t *regs, int write, uint64_t addr, + uint64_t *value, uint16_t size) +{ + uint64_t *reg_value = value; + struct z_vm *vm; + struct z_virt_dev *vdev; + struct _dnode *d_node, *ds_node; + struct virtual_device_instance *vdevice_instance; + + vm = get_current_vm(); + SYS_DLIST_FOR_EACH_NODE_SAFE(&vm->vdev_list, d_node, ds_node){ + vdev = CONTAINER_OF(d_node, struct z_virt_dev, vdev_node); + vdevice_instance = (struct virtual_device_instance *)vdev->priv_data; + if(vdevice_instance != NULL) { + if(DEV_DATA(vdevice_instance)->vdevice_type & VM_DEVICE_PRE_KERNEL_1){ + if ((addr >= vdev->vm_vdev_paddr) && (addr < vdev->vm_vdev_paddr + vdev->vm_vdev_size)) { + if (write) { + return ((const struct virt_device_api * \ + const)(vdevice_instance->api))->virt_device_write(vdev, addr, reg_value, size); + }else{ + return ((const struct virt_device_api * \ + const)(vdevice_instance->api))->virt_device_read(vdev, addr, reg_value, size); + } + } + } + } + } + /* Not found the vdev */ + ZVM_LOG_WARN("There are no virtual dev for this addr, addr : 0x%llx \n", addr); + return -ENODEV; +} + +int vm_unmap_ptdev(struct z_virt_dev *vdev, uint64_t vm_dev_base, + uint64_t vm_dev_size, struct z_vm *vm) +{ + uint64_t p_base, v_base, p_size, v_size; + + p_base = vdev->vm_vdev_paddr; + p_size = vdev->vm_vdev_size; + v_base = vm_dev_base; + v_size = vm_dev_size; + + if (p_size != v_size || p_size == 0) { + ZVM_LOG_WARN("The device is not matching, can not allocat this dev to the vm!"); + return -ENODEV; + } + + return arch_vm_dev_domain_unmap(p_size, v_base, v_size, vdev->name, vm->vmid, &vm->vmem_domain->vm_mm_domain->arch.ptables); + +} + +int vm_vdev_pause(struct z_vcpu *vcpu) +{ + ARG_UNUSED(vcpu); + return 0; +} + +int handle_vm_device_emulate(struct z_vm *vm, uint64_t pa_addr) +{ + int ret; + struct z_virt_dev *vm_dev, *chosen_dev = NULL; + struct zvm_dev_lists *vdev_list; + struct _dnode *d_node, *ds_node; + struct device *dev; + k_spinlock_key_t key; + key = k_spin_lock(&vm_device_chosen.lock); + + vdev_list = get_zvm_dev_lists(); + SYS_DLIST_FOR_EACH_NODE_SAFE(&vdev_list->dev_idle_list, d_node, ds_node) { + vm_dev = CONTAINER_OF(d_node, struct z_virt_dev, vdev_node); + /* Match the memory address ? */ + if(pa_addr >= vm_dev->vm_vdev_vaddr && pa_addr < (vm_dev->vm_vdev_vaddr+vm_dev->vm_vdev_size)) { + vm_device_chosen.chosen_flag = true; + + chosen_dev = vm_virt_dev_add(vm, vm_dev->name, vm_dev->dev_pt_flag, vm_dev->shareable, + vm_dev->vm_vdev_paddr, vm_dev->vm_vdev_vaddr, + vm_dev->vm_vdev_size, vm_dev->hirq, vm_dev->virq); + if(!chosen_dev){ + ZVM_LOG_WARN("there are no idle device %s for vm!", vm_dev->name); + vm_device_chosen.chosen_flag = false; + k_spin_unlock(&vm_device_chosen.lock, key); + return -ENODEV; + } + /* move device to used node! */ + sys_dlist_remove(&vm_dev->vdev_node); + sys_dlist_append(&vdev_list->dev_used_list, &vm_dev->vdev_node); + vm_device_irq_init(vm, chosen_dev); + + dev = (struct device *)vm_dev->priv_data; + DEV_DATA(dev)->device_data = chosen_dev; + + if(chosen_dev->shareable){ + chosen_dev->priv_data = dev; + ret = ((struct virt_device_api *)dev->api)->init_fn(dev, vm, chosen_dev); + if (ret) { + ZVM_LOG_WARN(" Init device %s error! \n", dev->name); + return -EFAULT; + } + } + + ZVM_LOG_INFO("** Adding %s device to %s. \n", chosen_dev->name, vm->vm_name); + k_spin_unlock(&vm_device_chosen.lock, key); + return 0; + } + } + k_spin_unlock(&vm_device_chosen.lock, key); + return -ENODEV; +} + +static void virt_device_isr(const void *user_data) +{ + uint32_t virq; + int err = 0; + const struct z_virt_dev *vdev = (const struct z_virt_dev *)user_data; + + virq = vdev->virq; + if (virq == VM_DEVICE_INVALID_VIRQ) { + ZVM_LOG_WARN("Invalid interrupt occur! \n"); + return; + } + if (!vdev->vm) { + ZVM_LOG_WARN("VM struct not exit here!"); + return; + } + + err = set_virq_to_vm(vdev->vm, virq); + if (err < 0) { + ZVM_LOG_WARN("Send virq to vm error!"); + } +} + + +void virt_device_irq_callback_data_set(int irq, int priority, void *user_data) +{ + static int vector_num; + + vector_num = irq_connect_dynamic(irq, 1, virt_device_isr, user_data, 0); + if(vector_num < 0){ + ZVM_LOG_WARN("Connect dynamic irq error! \n"); + ZVM_LOG_WARN("irq: %d, priority: %d. \n", irq, priority); + return; + } + irq_enable(irq); +} + +struct z_virt_dev *allocate_device_to_vm(const struct device *dev, struct z_vm *vm, + struct z_virt_dev *vdev_desc, bool pt_flag, bool shareable) +{ + struct z_virt_dev *vdev; + + vdev = vm_virt_dev_add(vm, dev->name, pt_flag, shareable, DEV_CFG(dev)->reg_base, + vdev_desc->vm_vdev_paddr, DEV_CFG(dev)->reg_size, + DEV_CFG(dev)->hirq_num, vdev_desc->virq); + if(!vdev){ + return NULL; + } + + vm_device_irq_init(vm, vdev); + return vdev; +} + +void vm_device_callback_func(const struct device *dev, void *cb, + void *user_data) +{ + uint32_t virq; + ARG_UNUSED(cb); + int err = 0; + const struct z_virt_dev *vdev = (const struct z_virt_dev *)user_data; + + virq = vdev->virq; + if (virq == VM_DEVICE_INVALID_VIRQ) { + ZVM_LOG_WARN("Invalid interrupt occur! \n"); + return; + } + if (!vdev->vm) { + ZVM_LOG_WARN("VM struct not exit here!"); + return; + } + + err = set_virq_to_vm(vdev->vm, virq); + if (err < 0) { + ZVM_LOG_WARN("Send virq to vm error!"); + } + +} + +int vm_device_init(struct z_vm *vm) +{ + int ret = 0, i; + + sys_dlist_init(&vm->vdev_list); + + /* Assign ids to virtual devices. */ + for (i = 0; i < zvm_virtual_devices_count_get(); i++) { + const struct virtual_device_instance *virtual_device = zvm_virtual_device_get(i); + ZVM_LOG_INFO("Device name: %s. \n", virtual_device->name); + /*If the virtual device is nessenary for vm*/ + if(virtual_device->data->vdevice_type & VM_DEVICE_PRE_KERNEL_1){ + virtual_device->api->init_fn(NULL, vm, NULL); + ZVM_LOG_INFO("Init %s for VM successful. \n", virtual_device->name); + } + } + + /* TODO: scan the dtb and get the device's node. */ + /* Board specific device init, for example fig debugger. */ + switch (vm->os->info.os_type){ + case OS_TYPE_ZEPHYR: + break; + case OS_TYPE_LINUX: + ret = vm_init_bdspecific_device(vm); + break; + default: + break; + } + + return ret; +} + +int vm_device_deinit(struct z_vm *vm) +{ + int ret = 0; + struct _dnode *dev_list = &vm->vdev_list; + struct _dnode *d_node, *ds_node; + struct z_virt_dev *vdev; + const struct virtual_device_instance *vdevice_instance; + + SYS_DLIST_FOR_EACH_NODE_SAFE(dev_list, d_node, ds_node){ + vdev = CONTAINER_OF(d_node, struct z_virt_dev, vdev_node); + vdevice_instance = (const struct virtual_device_instance *)vdev->priv_data; + if(vdevice_instance != NULL) { + if(vdevice_instance->api->deinit_fn){ + ret = vdevice_instance->api->deinit_fn(NULL, vm, vdev); + ZVM_LOG_INFO("Remove virt_serial: %s.\n", vdev->name); + } + } + } + + return ret; +} diff --git a/subsys/zvm/vm_irq.c b/subsys/zvm/vm_irq.c new file mode 100644 index 00000000000000..0678513d51df7b --- /dev/null +++ b/subsys/zvm/vm_irq.c @@ -0,0 +1,169 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +#define VWFI_YIELD_THRESHOLD 100 + +bool vcpu_irq_exist(struct z_vcpu *vcpu) +{ + bool pend, active; + struct vcpu_virt_irq_block *vb = &vcpu->virq_block; + + pend = sys_dlist_is_empty(&vb->pending_irqs); + active = sys_dlist_is_empty(&vb->active_irqs); + + if((!(pend && active)) || virt_irq_ispending(vcpu)){ + return true; + } + + return false; +} + +int vcpu_wait_for_irq(struct z_vcpu *vcpu) +{ + bool irq_exist, vcpu_will_yeild=false, vcpu_will_pause=false; + k_spinlock_key_t key; + + /* judge whether the vcpu has pending or active irq */ + irq_exist = vcpu_irq_exist(vcpu); + key = k_spin_lock(&vcpu->virq_block.vwfi.wfi_lock); + + if(irq_exist){ + vcpu->virq_block.vwfi.yeild_count=0; + goto done; + }else if(vcpu->virq_block.vwfi.yeild_count < VWFI_YIELD_THRESHOLD){ + vcpu->virq_block.vwfi.yeild_count++; + vcpu_will_yeild = true; + goto done; + } + + if(!vcpu->virq_block.vwfi.state){ + vcpu_will_pause = true; + vcpu->virq_block.vwfi.state = true; + /*start wfi timeout*/ + } + +done: + k_spin_unlock(&vcpu->virq_block.vwfi.wfi_lock, key); + + if(vcpu_will_yeild){ + /*yeild this thread*/ + } + + if(vcpu_will_pause){ + irq_exist = vcpu_irq_exist(vcpu); + if(irq_exist){ + key = k_spin_lock(&vcpu->virq_block.vwfi.wfi_lock); + vcpu->virq_block.vwfi.yeild_count=0; + vcpu->virq_block.vwfi.state=false; + /*end wfi timeout*/ + k_spin_unlock(&vcpu->virq_block.vwfi.wfi_lock, key); + } + } + + return 0; + +} + +/** + * @brief Init call for creating interrupt control block for vm. + */ +static int vm_irq_ctrlblock_create(struct device *unused, struct z_vm *vm) +{ + ARG_UNUSED(unused); + struct vm_virt_irq_block *vvi_block = &vm->vm_irq_block; + + if (VGIC_TYPER_LR_NUM != 0) { + vvi_block->flags = 0; + vvi_block->flags |= VIRQ_HW_SUPPORT; + } else { + ZVM_LOG_ERR("Init gicv3 failed, the hardware do not supporte it. \n"); + return -ENODEV; + } + + vvi_block->enabled = false; + vvi_block->cpu_num = vm->vcpu_num; + vvi_block->irq_num = VM_GLOBAL_VIRQ_NR; + memset(vvi_block->ipi_vcpu_source, 0, sizeof(uint32_t)*CONFIG_MP_NUM_CPUS*VM_SGI_VIRQ_NR); + memset(vvi_block->irq_bitmap, 0, VM_GLOBAL_VIRQ_NR/0x08); + + return 0; +} + +/** + * @brief Init virq descs for each vm. For It obtains some + * device irq which is shared by all cores, + * this type of interrupt is inited in this routine. + */ +static int vm_virq_desc_init(struct z_vm *vm) +{ + int i; + struct virt_irq_desc *desc; + + for (i = 0; i < VM_SPI_VIRQ_NR; i++) { + desc = &vm->vm_irq_block.vm_virt_irq_desc[i]; + + desc->virq_flags = 0; + /* For shared irq, it shared with all cores */ + desc->vcpu_id = DEFAULT_VCPU; + desc->vm_id = vm->vmid; + desc->vdev_trigger = 0; + desc->virq_num = i; + desc->pirq_num = i; + desc->id = VM_INVALID_DESC_ID; + desc->virq_states = VIRQ_STATE_INVALID; + desc->type = 0; + + sys_dnode_init(&(desc->desc_node)); + } + + return 0; +} + +void vm_device_irq_init(struct z_vm *vm, struct z_virt_dev *vm_dev) +{ + bool *bit_map; + struct virt_irq_desc *desc; + + desc = get_virt_irq_desc(vm->vcpus[DEFAULT_VCPU], vm_dev->virq); + if(vm_dev->dev_pt_flag) { + desc->virq_flags |= VIRQ_HW_FLAG; + ZVM_LOG_INFO("Add hardware interrupt support for %s device !\n", vm_dev->name); + }else { + ZVM_LOG_INFO("Add software interrupt support for %s device !\n", vm_dev->name); + } + desc->id = desc->virq_num; + desc->pirq_num = vm_dev->hirq; + desc->virq_num = vm_dev->virq; + /* For passthrough device, using fast irq path. */ + if(vm_dev->dev_pt_flag) { + bit_map = vm->vm_irq_block.irq_bitmap; + bit_map[vm_dev->hirq] = true; + } +} + +int vm_irq_block_init(struct z_vm *vm) +{ + int ret = 0; + + ret = vm_irq_ctrlblock_create(NULL, vm); + if(ret){ + return ret; + } + ret = vm_virq_desc_init(vm); + + return ret; +} diff --git a/subsys/zvm/vm_manager.c b/subsys/zvm/vm_manager.c new file mode 100644 index 00000000000000..547c54c0c2215f --- /dev/null +++ b/subsys/zvm/vm_manager.c @@ -0,0 +1,260 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../lib/posix/options/getopt/getopt.h" + + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +int zvm_new_guest(size_t argc, char **argv) +{ + int ret; + struct z_vm *new_vm = NULL; + struct z_os_info *vm_info = NULL; + struct getopt_state *state; + state = getopt_state_get(); + + if (is_vmid_full()) { + ZVM_LOG_WARN("System vm's num has reached the limit.\n"); + return -ENXIO; + } + + new_vm = (struct z_vm*)k_malloc(sizeof(struct z_vm)); + if (!new_vm) { + ZVM_LOG_WARN("Allocation memory for VM Error!\n"); + return -ENOMEM; + } + + vm_info = (struct z_os_info *)k_malloc(sizeof(struct z_os_info)); + if (!vm_info) { + k_free(new_vm); + ZVM_LOG_WARN("Allocation memory for VM info Error!\n"); + return -ENOMEM; + } + + ret = vm_sysinfo_init(argc, argv, new_vm, vm_info); + if (ret) { + return ret; + } + + ret = vm_create(vm_info, new_vm); + if (ret) { + k_free(new_vm); + k_free(vm_info); + ZVM_LOG_WARN("Can not create vm struct, VM struct init failed!\n"); + return ret; + } + ZVM_LOG_INFO("\n**Create VM instance successful! \n"); + + ret = vm_ops_init(new_vm); + if (ret) { + ZVM_LOG_WARN("VM ops init failed!\n"); + return ret; + } + ZVM_LOG_INFO("** Init VM ops successful! \n"); + + ret = vm_irq_block_init(new_vm); + if (ret < 0) { + ZVM_LOG_WARN(" Init vm's irq block error!\n"); + return ret; + } + ZVM_LOG_INFO("** Init VM irq block successful! \n"); + + ret = vm_vcpus_init(new_vm); + if (ret < 0) { + ZVM_LOG_WARN("create vcpu error! \n"); + return -ENXIO; + } + ZVM_LOG_INFO("** Init VM vcpus instances successful! \n"); + + ret = vm_device_init(new_vm); + if (ret) { + ZVM_LOG_WARN(" Init vm's virtual device error! \n"); + return ret; + } + ZVM_LOG_INFO("** Init VM devices successful! \n"); + + ret = vm_mem_init(new_vm); + if(ret < 0){ + return ret; + } + ZVM_LOG_INFO("** Init VM memory successful! \n"); + k_free(vm_info); + + ZVM_LOG_INFO("\n|*********************************************|\n"); + ZVM_LOG_INFO("|****** Create vm successful! **************| \n"); + ZVM_LOG_INFO("|****** VM INFO ******| \n"); + ZVM_LOG_INFO("|****** VM-NAME: %-12s ******| \n", new_vm->vm_name); + ZVM_LOG_INFO("|****** VM-ID: %-12d ******| \n", new_vm->vmid); + ZVM_LOG_INFO("|****** VCPU NUM: %-12d ******| \n", new_vm->vcpu_num); + switch (new_vm->os->info.os_type) { + case OS_TYPE_LINUX: + ZVM_LOG_INFO("|****** VMEM SIZE: %-12d(M) ******| \n", + LINUX_VM_MEMORY_SIZE/(1024*1024)); + break; + case OS_TYPE_ZEPHYR: + ZVM_LOG_INFO("|****** VMEM SIZE: %-12d(M) ******| \n", + ZEPHYR_VM_MEMORY_SIZE/(1024*1024)); + break; + default: + ZVM_LOG_INFO("|****** OTHER VM, NO MEMORY MSG ******| \n"); + } + ZVM_LOG_INFO("|*********************************************|\n"); + + return 0; +} + +int zvm_run_guest(size_t argc, char **argv) +{ + uint16_t vm_id; + int ret = 0; + struct z_vm *vm; + + ZVM_LOG_INFO("** Ready to run VM. \n"); + vm_id = z_parse_run_vm_args(argc, argv); + if (!(BIT(vm_id) & zvm_overall_info->alloced_vmid)) { + ZVM_LOG_WARN("This vmid is not exist!\n Please input zvm info to show info! \n"); + return -EINVAL; + } + + vm = zvm_overall_info->vms[vm_id]; + if (vm->vm_status & VM_STATE_RUNNING) { + ZVM_LOG_WARN("This vm is already running! \n Please input zvm info to check vms! \n"); + return -EINVAL; + } + + if (vm->vm_status & (VM_STATE_NEVER_RUN | VM_STATE_PAUSE)) { + if (vm->vm_status & VM_STATE_NEVER_RUN) { + load_os_image(vm); + } + vm_vcpus_ready(vm); + } else { + ZVM_LOG_WARN("The VM has a invalid status, abort! \n"); + return -ENODEV; + } + + ZVM_LOG_INFO("\n|*********************************************|\n"); + ZVM_LOG_INFO("|******\t Start vm successful! ***************| \n"); + ZVM_LOG_INFO("|******\t\t VM INFO \t \t******| \n"); + ZVM_LOG_INFO("|******\t VM-NAME: %s \t******| \n", vm->vm_name); + ZVM_LOG_INFO("|******\t VM-ID: \t %d \t\t******| \n", vm->vmid); + ZVM_LOG_INFO("|******\t VCPU NUM: \t %d \t\t******| \n", vm->vcpu_num); + ZVM_LOG_INFO("|*********************************************|\n"); + + return ret; +} + +int zvm_pause_guest(size_t argc, char **argv) +{ + uint16_t vm_id; + int ret = 0; + struct z_vm *vm; + k_spinlock_key_t key; + + key = k_spin_lock(&zvm_overall_info->spin_zmi); + + vm_id = z_parse_pause_vm_args(argc, argv); + if (!(BIT(vm_id) & zvm_overall_info->alloced_vmid)) { + ZVM_LOG_WARN("This vmid is not exist!\n Please input zvm info to show info! \n"); + k_spin_unlock(&zvm_overall_info->spin_zmi, key); + return -EINVAL; + } + + vm = zvm_overall_info->vms[vm_id]; + k_spin_unlock(&zvm_overall_info->spin_zmi, key); + if (vm->vm_status != VM_STATE_RUNNING) { + ZVM_LOG_WARN("This vm is not running!\n No need to pause it! \n"); + return -EPERM; + } + ret = vm_vcpus_pause(vm); + + return ret; +} + +int zvm_delete_guest(size_t argc, char **argv) +{ + uint16_t vm_id; + int i; + struct z_vm *vm; + + vm_id = z_parse_delete_vm_args(argc, argv); + if (!(BIT(vm_id) & zvm_overall_info->alloced_vmid)) { + ZVM_LOG_WARN("This vm is not exist!\n Please input zvm info to list vms!"); + return 0; + } + + vm = zvm_overall_info->vms[vm_id]; + switch (vm->vm_status) { + case VM_STATE_RUNNING: + ZVM_LOG_INFO("This vm is running!\n Try to stop and delete it!\n"); + vm_vcpus_halt(vm); + + for (i = 0; i < vm->vcpu_num; i++) { + k_sem_take(&vm->vcpu_exit_sem[i], K_FOREVER); + } + barrier_isync_fence_full(); + vm_delete(vm); + break; + case VM_STATE_PAUSE: + ZVM_LOG_INFO("This vm is paused!\n Just delete it!\n"); + vm_delete(vm); + break; + case VM_STATE_NEVER_RUN: + ZVM_LOG_INFO("This vm is created but not run!\n Just delete it!\n"); + vm_delete(vm); + break; + default: + ZVM_LOG_WARN("This vm status is invalid!\n"); + return -ENODEV; + } + + return 0; +} + +int zvm_info_guest(size_t argc, char **argv) +{ + int ret = 0; + + if (zvm_overall_info->vm_total_num > 0) { + ret = z_list_vms_info(0); + }else{ + ret = -ENODEV; + } + + return ret; +} + +/*TODO: add shell*/ +void zvm_shutdown_guest(struct z_vm *vm) +{ + ARG_UNUSED(vm); +} + +void zvm_reboot_guest(struct z_vm *vm) +{ + int ret; + ZVM_LOG_INFO("vm reboot.... \n"); + ret = vm_vcpus_pause(vm); + if(ret < 0) { + ZVM_LOG_WARN("VM reboot failed: pausing vm failed! \n"); + } + /* + * TODO: smp + */ + vm_vcpus_reset(vm); + vm->reboot = true; + vm_vcpus_ready(vm); +} diff --git a/subsys/zvm/vm_mm.c b/subsys/zvm/vm_mm.c new file mode 100644 index 00000000000000..4edce8f03fc6f2 --- /dev/null +++ b/subsys/zvm/vm_mm.c @@ -0,0 +1,521 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +struct k_spinlock vm_mem_domain_lock; +static uint8_t vm_max_partitions = CONFIG_MAX_DOMAIN_PARTITIONS; +static struct k_spinlock z_vm_domain_lock; + + +/** + * @brief add vpart_space to vm's unused list area. + */ +static int add_idle_vpart(struct vm_mem_domain *vmem_domain, + struct vm_mem_partition *vpart) +{ + + /* add vpart free list to mem_domain unused list */ + if (vpart->vpart_node.next == NULL) { + sys_dlist_append(&vmem_domain->idle_vpart_list, &vpart->vpart_node); + } + + return 0; +} + +/** + * @brief add vpart_space to vm's used list area. + */ +int add_mapped_vpart(struct vm_mem_domain *vmem_domain, + struct vm_mem_partition *vpart) +{ + if (!vpart) { + return -1; + } + + sys_dlist_append(&vmem_domain->mapped_vpart_list, &vpart->vpart_node); + return 0; +} + +static struct vm_mem_partition *alloc_vm_mem_partition(uint64_t hpbase, + uint64_t ipbase, uint64_t size, uint32_t attrs) +{ + struct vm_mem_partition *vpart; + + /* allocate memory for vpart */ + vpart = (struct vm_mem_partition *)k_malloc(sizeof(struct vm_mem_partition)); + if (!vpart) { + return NULL; + } + vpart->vm_mm_partition = (struct k_mem_partition *) + k_malloc(sizeof(struct k_mem_partition)); + if (!vpart->vm_mm_partition) { + k_free(vpart); + return NULL; + } + vpart->vm_mm_partition->start = ipbase; + vpart->vm_mm_partition->size = size; + vpart->vm_mm_partition->attr.attrs = attrs; + + vpart->part_hpa_base = hpbase; + vpart->part_hpa_size = size; + + sys_dnode_init(&vpart->vpart_node); + sys_dlist_init(&vpart->blk_list); + + return vpart; +} + +/** + * @brief init vpart from default device tree. + */ +static int create_vm_mem_vpart(struct vm_mem_domain *vmem_domain, uint64_t hpbase, + uint64_t ipbase, uint64_t size, uint32_t attrs) +{ + int ret = 0; + struct vm_mem_partition *vpart; + + vpart = alloc_vm_mem_partition(hpbase, ipbase, size, attrs); + if (vpart == NULL) { + return -ENXIO; + } + vpart->vmem_domain = vmem_domain; + + ret = add_idle_vpart(vmem_domain, vpart); + + return ret; +} + +static int vm_ram_mem_create(struct vm_mem_domain *vmem_domain) +{ + int ret = 0; + uint64_t va_base, pa_base, size; + struct z_vm *vm = vmem_domain->vm; + + va_base = vm->os->info.vm_mem_base; + size = vm->os->info.vm_mem_size; + pa_base = vm->os->info.vm_load_base; + + ret = create_vm_mem_vpart(vmem_domain, pa_base, va_base, size, MT_VM_NORMAL_MEM); + return ret; +} + +/** + * @brief Create the dtb memory partition. + */ +static int vm_dtb_mem_create(struct vm_mem_domain *vmem_domain) +{ + int ret = 0; + ARG_UNUSED(ret); + uint32_t vm_dtb_size = LINUX_DTB_MEM_SIZE; + uint64_t vm_dtb_base = LINUX_DTB_MEM_BASE; + + /* Attribute 'MT_VM_DEVICE_MEM' was occer a address size trap, replace with normal memory */ + return create_vm_mem_vpart(vmem_domain, vm_dtb_base, vm_dtb_base, + vm_dtb_size, MT_VM_NORMAL_MEM); +} + +static int vm_init_mem_create(struct vm_mem_domain *vmem_domain) +{ + int ret = 0; + struct z_vm *vm = vmem_domain->vm; + + ret = vm_ram_mem_create(vmem_domain); + if (ret) { + return ret; + } + +#ifdef CONFIG_VM_DTB_FILE_INPUT + if(vm->os->info.os_type == OS_TYPE_LINUX){ + ret = vm_dtb_mem_create(vmem_domain); + } +#endif /* CONFIG_VM_DTB_FILE_INPUT */ + + return ret; +} + +static int vm_domain_init(struct k_mem_domain *domain, uint8_t num_parts, + struct k_mem_partition *parts[], struct z_vm *vm) +{ + k_spinlock_key_t key; + int ret = 0; + uint32_t vmid = vm->vmid; + + if (domain == NULL) { + ret = -EINVAL; + goto out; + } + + if (!(num_parts == 0U || parts != NULL)) { + ret = -EINVAL; + goto out; + } + + if (!(num_parts <= vm_max_partitions)) { + ret = -EINVAL; + goto out; + } + + key = k_spin_lock(&z_vm_domain_lock); + domain->num_partitions = 0U; + (void)memset(domain->partitions, 0, sizeof(domain->partitions)); + sys_dlist_init(&domain->mem_domain_q); + + ret = arch_vm_mem_domain_init(domain, vmid); + k_spin_unlock(&z_vm_domain_lock, key); + +out: + return ret; +} + +static bool check_vm_add_partition(struct k_mem_domain *domain, + struct k_mem_partition *part) +{ + int i; + uintptr_t pstart, pend, dstart, dend; + + if (part->size == 0U) { + return false; + } + + pstart = part->start; + pend = part->start + part->size; + + if (pend <= pstart) { + return false; + } + + /* Check that this partition doesn't overlap any existing ones already + * in the domain + */ + for (i = 0; i < domain->num_partitions; i++) { + struct k_mem_partition *dpart = &domain->partitions[i]; + + if (dpart->size == 0U) { + /* Unused slot */ + continue; + } + + dstart = dpart->start; + dend = dstart + dpart->size; + + if (pend > dstart && dend > pstart) { + ZVM_LOG_WARN("zvm partition %p base %lx (size %zu) overlaps existing base %lx (size %zu) \n", + part, part->start, part->size, + dpart->start, dpart->size); + return false; + } + } + + return true; +} + +static int vm_mem_domain_partition_add(struct vm_mem_domain *vmem_dm, + struct vm_mem_partition *vpart) +{ + int p_idx; + int ret = 0; + uintptr_t phys_start; + struct k_mem_domain *domain; + struct k_mem_partition *part; + struct z_vm *vm; + k_spinlock_key_t key; + + phys_start = vpart->part_hpa_base; + domain = vmem_dm->vm_mm_domain; + part = vpart->vm_mm_partition; + vm = vmem_dm->vm; + + if (!check_vm_add_partition(domain, part)) { + ret = -EINVAL; + goto out; + } + + key = k_spin_lock(&vm_mem_domain_lock); + + for (p_idx = 0; p_idx < vm_max_partitions; p_idx++) { + /* A zero-sized partition denotes it's a free partition */ + if (domain->partitions[p_idx].size == 0U) { + break; + } + } + + if (p_idx >= vm_max_partitions) { + ret = -ENOSPC; + goto unlock_out; + } + domain->partitions[p_idx].start = part->start; + domain->partitions[p_idx].size = part->size; + domain->partitions[p_idx].attr = part->attr; + domain->num_partitions++; + +#ifdef CONFIG_ARCH_MEM_DOMAIN_SYNCHRONOUS_API + ret = arch_vm_mem_domain_partition_add(domain, p_idx, phys_start, vm->vmid); +#endif /* CONFIG_ARCH_MEM_DOMAIN_SYNCHRONOUS_API */ + +unlock_out: + k_spin_unlock(&vm_mem_domain_lock, key); + +out: + return ret; +} + +static int vm_mem_domain_partition_remove(struct vm_mem_domain *vmem_dm) +{ + int p_idx; + int ret = 0; + uintptr_t phys_start; + ARG_UNUSED(phys_start); + struct k_mem_domain *domain; + struct z_vm *vm; + k_spinlock_key_t key; + + domain = vmem_dm->vm_mm_domain; + vm = vmem_dm->vm; + key = k_spin_lock(&vm_mem_domain_lock); + +#ifdef CONFIG_ARCH_MEM_DOMAIN_SYNCHRONOUS_API + for(p_idx = 0;p_idx < vm_max_partitions; p_idx++) { + if(domain->partitions[p_idx].size != 0U){ + ret = arch_vm_mem_domain_partition_remove(domain,p_idx,vm->vmid); + } + } +#endif + k_free(domain); + k_spin_unlock(&vm_mem_domain_lock,key); + + return ret; +} + +int vm_mem_domain_partitions_add(struct vm_mem_domain *vmem_dm) +{ + int ret = 0; + k_spinlock_key_t key; + struct _dnode *d_node, *ds_node; + struct vm_mem_partition *vpart; + + key = k_spin_lock(&vmem_dm->spin_mmlock); + SYS_DLIST_FOR_EACH_NODE_SAFE(&vmem_dm->idle_vpart_list, d_node, ds_node){ + vpart = CONTAINER_OF(d_node, struct vm_mem_partition, vpart_node); + ret = vm_mem_domain_partition_add(vmem_dm, vpart); + if (ret) { + ZVM_LOG_ERR("vpart memory map failed, vpart.base 0x%llx, vpart.size 0x%llx.", vpart->part_hpa_base, vpart->part_hpa_size); + k_spin_unlock(&vmem_dm->spin_mmlock, key); + return ret; + } + + sys_dlist_remove(&vpart->vpart_node); + sys_dlist_append(&vmem_dm->mapped_vpart_list, &vpart->vpart_node); + } + + k_spin_unlock(&vmem_dm->spin_mmlock, key); + return ret; +} + +int vm_mem_apart_remove(struct vm_mem_domain *vmem_dm) +{ + int ret = 0; + k_spinlock_key_t key; + struct _dnode *d_node, *ds_node; + struct vm_mem_partition *vpart; + struct k_mem_partition *vmpart; + struct k_mem_domain *vm_mem_dm; + struct z_vm *vm; + + vm = vmem_dm->vm; + key = k_spin_lock(&vmem_dm->spin_mmlock); + + vm_mem_dm = vmem_dm->vm_mm_domain; + ret = vm_mem_domain_partition_remove(vmem_dm); + SYS_DLIST_FOR_EACH_NODE_SAFE(&vmem_dm->mapped_vpart_list, d_node, ds_node){ + vpart = CONTAINER_OF(d_node, struct vm_mem_partition, vpart_node); + vmpart = vpart->vm_mm_partition; + sys_dlist_remove(&vpart->vpart_node); + k_free(vmpart); + k_free(vpart); + } + + k_spin_unlock(&vmem_dm->spin_mmlock,key); + return ret; +} + +int vm_dynmem_apart_add(struct vm_mem_domain *vmem_dm) +{ + int ret = 0; + uint64_t vm_mem_blk_size; + k_spinlock_key_t key; + struct _dnode *d_node, *ds_node; + struct vm_mem_partition *vpart; + struct z_vm *vm = vmem_dm->vm; + + switch (vm->os->info.os_type) { + case OS_TYPE_LINUX: + vm_mem_blk_size = LINUX_VM_BLOCK_SIZE; + break; + case OS_TYPE_ZEPHYR: + vm_mem_blk_size = ZEPHYR_VM_BLOCK_SIZE; + break; + default: + vm_mem_blk_size = DEFAULT_VM_BLOCK_SIZE; + ZVM_LOG_WARN("Unknow os type!\n"); + break; + } + + key = k_spin_lock(&vmem_dm->spin_mmlock); + + SYS_DLIST_FOR_EACH_NODE_SAFE(&vmem_dm->idle_vpart_list, d_node, ds_node){ + /*TODO: Need a judge for all.*/ + vpart = CONTAINER_OF(d_node, struct vm_mem_partition, vpart_node); + if(ret){ + ZVM_LOG_WARN("Init vm memory failed!\n"); + return ret; + } + + sys_dlist_remove(&vpart->vpart_node); + sys_dlist_append(&vmem_dm->mapped_vpart_list, &vpart->vpart_node); + + } + k_spin_unlock(&vmem_dm->spin_mmlock, key); + + return ret; +} + +int vm_mem_domain_create(struct z_vm *vm) +{ + int ret; + k_spinlock_key_t key; + struct vm_mem_partition *vpart; + ARG_UNUSED(vpart); + struct vm_mem_domain *vmem_dm = vm->vmem_domain; + + /* vm'mm struct init here */ + vmem_dm = (struct vm_mem_domain *)k_malloc(sizeof(struct vm_mem_domain)); + if (!vmem_dm) { + ZVM_LOG_WARN("Allocate mm memory for vm mm struct failed! \n"); + return -ENXIO; + } + vmem_dm->vm_mm_domain = (struct k_mem_domain *)k_malloc(sizeof(struct k_mem_domain)); + if (!vmem_dm->vm_mm_domain) { + ZVM_LOG_WARN("Allocate mm memory domain failed! \n"); + return -ENXIO; + } + vmem_dm->is_init = false; + ZVM_SPINLOCK_INIT(&vmem_dm->spin_mmlock); + /* init the list of used and unused vpart */ + sys_dlist_init(&vmem_dm->idle_vpart_list); + sys_dlist_init(&vmem_dm->mapped_vpart_list); + ret = vm_domain_init(vmem_dm->vm_mm_domain, 0, NULL, vm); + if (ret) { + ZVM_LOG_WARN("Init vm domain failed! \n"); + return -ENXIO; + } + + vmem_dm->vm = vm; + vm->vmem_domain = vmem_dm; + + key = k_spin_lock(&vmem_dm->spin_mmlock); + ret = vm_init_mem_create(vmem_dm); + if (ret) { + ZVM_LOG_WARN("Init vm areas failed! \n"); + k_spin_unlock(&vmem_dm->spin_mmlock, key); + return ret; + } + k_spin_unlock(&vmem_dm->spin_mmlock, key); + + return 0; +} + +int vm_vdev_mem_create(struct vm_mem_domain *vmem_domain, uint64_t hpbase, + uint64_t ipbase, uint64_t size, uint32_t attrs) +{ + return create_vm_mem_vpart(vmem_domain, hpbase, ipbase, size, attrs); +} + +uint64_t vm_gpa_to_hpa(struct z_vm *vm, uint64_t gpa, struct vm_mem_partition *vpart) +{ + struct vm_mem_domain *vmem_domain = vm->vmem_domain; + sys_dnode_t *d_node, *ds_node; + uint64_t vpart_gpa_start, vpart_gpa_end, vpart_hpa_start; + + SYS_DLIST_FOR_EACH_NODE_SAFE(&vmem_domain->mapped_vpart_list, d_node, ds_node) { + vpart = CONTAINER_OF(d_node, struct vm_mem_partition, vpart_node); + + vpart_gpa_start = (uint64_t)(vpart->vm_mm_partition->start); + vpart_gpa_end = vpart_gpa_start + ((uint64_t)vpart->vm_mm_partition->size); + + if(vpart_gpa_start <= gpa && gpa <= vpart_gpa_end) { + vpart_hpa_start = vpart->part_hpa_base; + return (gpa - vpart_gpa_start + vpart_hpa_start); + } + } + return -ESRCH; +} + +void vm_host_memory_read(uint64_t hpa, void *dst, size_t len) +{ + size_t len_actual = len; + uint64_t *hva; + if (len == 1) { + len = 4; + } + k_mem_map_phys_bare((uint8_t **)&hva, hpa, len, K_MEM_CACHE_NONE | K_MEM_PERM_RW); + memcpy(dst, hva, len_actual); + k_mem_unmap_phys_bare((uint8_t *)hva, len); +} + +void vm_host_memory_write(uint64_t hpa, void *src, size_t len) +{ + size_t len_actual = len; + uint64_t *hva; + if (len == 1) { + len = 4; + } + k_mem_map_phys_bare((uint8_t **)&hva, (uintptr_t)hpa, len, K_MEM_CACHE_NONE | K_MEM_PERM_RW); + memcpy(hva, src, len_actual); + k_mem_unmap_phys_bare((uint8_t *)hva, len); +} + +void vm_guest_memory_read(struct z_vm *vm, uint64_t gpa, void *dst, size_t len) +{ + uint64_t hpa; + struct vm_mem_partition *vpart; + vpart = (struct vm_mem_partition *)k_malloc(sizeof(struct vm_mem_partition)); + if (!vpart) { + return; + } + hpa = vm_gpa_to_hpa(vm, gpa, vpart); + if(hpa < 0){ + printk("vm_guest_memory_read: gpa to hpa failed!\n"); + return ; + } + vm_host_memory_read(hpa, dst, len); +} + +void vm_guest_memory_write(struct z_vm *vm, uint64_t gpa, void *src, size_t len) +{ + uint64_t hpa; + struct vm_mem_partition *vpart; + vpart = (struct vm_mem_partition *)k_malloc(sizeof(struct vm_mem_partition)); + if (!vpart) { + return; + } + hpa = vm_gpa_to_hpa(vm, gpa, vpart); + if(hpa < 0){ + printk("vm_guest_memory_write: gpa to hpa failed!\n"); + return ; + } + vm_host_memory_write(hpa, src, len); +} diff --git a/subsys/zvm/zvm.c b/subsys/zvm/zvm.c new file mode 100644 index 00000000000000..ef2dd18154deae --- /dev/null +++ b/subsys/zvm/zvm.c @@ -0,0 +1,259 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +LOG_MODULE_REGISTER(ZVM_MODULE_NAME); + +struct zvm_manage_info *zvm_overall_info; /*@TODO,This may need to replace by macro later*/ +static struct zvm_dev_lists zvm_overall_dev_lists; + + +/** + * @brief zvm_hwsys_info_init aim to init zvm_info for the hypervisor + * Two stage for this function: + * 1. Init zvm_overall for some of para in struct + * 2. get hareware information form dts + * TODO: Add hardware here. + */ +static int zvm_hwsys_info_init(struct zvm_hwsys_info *z_info) +{ + int cpu_ret = -1, mem_ret = -1; + ARG_UNUSED(cpu_ret); + ARG_UNUSED(mem_ret); + + z_info->phy_mem_used = 0; + + return 0; +} + +void zvm_ipi_handler(void) +{ + struct z_vcpu *vcpu = _current_vcpu; + k_spinlock_key_t key; + + /* judge whether it is a vcpu thread */ + if (vcpu) { + if (vcpu->vcpuipi_count) { + /* judge whether the ipi is send to vcpu. */ + vm_ipi_handler(vcpu->vm); + key = k_spin_lock(&vcpu->vcpu_lock); + vcpu->vcpuipi_count--; + k_spin_unlock(&vcpu->vcpu_lock, key); + } + } + +} + +int load_os_image(struct z_vm *vm) +{ + int ret = 0; + + switch (vm->os->info.os_type){ + case OS_TYPE_LINUX: + case OS_TYPE_ZEPHYR: + load_vm_image(vm->vmem_domain, vm->os); + break; + default: + ZVM_LOG_WARN("Unsupport OS image!"); + ret = -EINVAL; + break; + } + return ret; +} + +/** + * @brief This function aim to init zvm dev on zvm init stage + * TODO: may add later + * @return int + */ +static int zvm_dev_ops_init() +{ + return 0; +} + +/** + * @brief Init zvm overall device here + * Two stage For this function: + * 1. Create and init zvm_over_all struct + * 2. Pass information from + * @return int : the error code there + */ +static int zvm_overall_init(void) +{ + int ret = 0; + + /* First initialize zvm_overall_info->hw_info. */ + zvm_overall_info = (struct zvm_manage_info*)k_malloc \ + (sizeof(struct zvm_manage_info)); + if (!zvm_overall_info) { + return -ENOMEM; + } + + zvm_overall_info->hw_info = (struct zvm_hwsys_info*) + k_malloc(sizeof(struct zvm_hwsys_info)); + if (!zvm_overall_info->hw_info) { + ZVM_LOG_ERR("Allocate memory for zvm_overall_info Error.\n"); + /* + * Too cumbersome resource release way. + * We can use resource stack way to manage these resouce. + */ + k_free(zvm_overall_info); + return -ENOMEM; + } + + ret = zvm_hwsys_info_init(zvm_overall_info->hw_info); + if (ret) { + k_free(zvm_overall_info->hw_info); + k_free(zvm_overall_info); + return ret; + } + + memset(zvm_overall_info->vms, 0, sizeof(zvm_overall_info->vms)); + zvm_overall_info->alloced_vmid = 0; + zvm_overall_info->vm_total_num = 0; + ZVM_SPINLOCK_INIT(&zvm_overall_info->spin_zmi); + + return ret; +} + +/** + * @brief Add all the device to the zvm_overall_list, expect passthrough device. +*/ +static int zvm_init_idle_device_1(const struct device *dev, struct z_virt_dev *vdev, + struct zvm_dev_lists *dev_list) +{ + uint16_t name_len; + struct z_virt_dev *vm_dev = vdev; + + /*@TODO:Determine whether to connect directly based on device type*/ + vm_dev->dev_pt_flag = true; + + if(strcmp(((struct virt_device_config *)dev->config)->device_type, "virtio") == 0) { + vm_dev->shareable = true; + } else { + vm_dev->shareable = false; + } + + name_len = strlen(dev->name); + name_len = name_len > VIRT_DEV_NAME_LENGTH ? VIRT_DEV_NAME_LENGTH : name_len; + strncpy(vm_dev->name, dev->name, name_len); + vm_dev->name[name_len] = '\0'; + + vm_dev->vm_vdev_paddr = ((struct virt_device_config *)dev->config)->reg_base; + vm_dev->vm_vdev_size = ((struct virt_device_config *)dev->config)->reg_size; + vm_dev->hirq = ((struct virt_device_config *)dev->config)->hirq_num; + + if(!strncmp(VM_DEFAULT_CONSOLE_NAME, vm_dev->name, VM_DEFAULT_CONSOLE_NAME_LEN)) { + vm_dev->vm_vdev_vaddr = VM_DEBUG_CONSOLE_BASE; + vm_dev->virq = VM_DEBUG_CONSOLE_IRQ; + } else { + vm_dev->vm_vdev_vaddr = vm_dev->vm_vdev_paddr; + vm_dev->virq = vm_dev->hirq; + } + + vm_dev->vm = NULL; + vm_dev->priv_data = (void *)dev; + + ZVM_LOG_INFO("Init idle device %s successful! \n", vm_dev->name); + ZVM_LOG_INFO("The device's paddress is 0x%x, paddress is 0x%x, size is 0x%x, hirq is %d, virq is %d. \n", + vm_dev->vm_vdev_paddr, vm_dev->vm_vdev_vaddr, vm_dev->vm_vdev_size, vm_dev->hirq, vm_dev->virq); + + sys_dnode_init(&vm_dev->vdev_node); + sys_dlist_append(&dev_list->dev_idle_list, &vm_dev->vdev_node); + + return 0; +} + +/** + * @brief Scan the device list and get the device by name. + */ +static int zvm_devices_list_init(void) +{ + struct z_virt_dev *vm_dev; + + sys_dlist_init(&zvm_overall_dev_lists.dev_idle_list); + sys_dlist_init(&zvm_overall_dev_lists.dev_used_list); + + /* scan the host dts and get the device list */ + STRUCT_SECTION_FOREACH(device, dev) { + /** + * through the `init_res` to judge whether the device is + * ready to allocate to vm. + */ + if (dev->state->init_res == VM_DEVICE_INIT_RES) { + vm_dev = (struct z_virt_dev*)k_malloc(sizeof(struct z_virt_dev)); + if (vm_dev == NULL) { + return -ENOMEM; + } + zvm_init_idle_device_1(dev, vm_dev, &zvm_overall_dev_lists); + } + } + + return 0; +} + +/** + * @brief Get the zvm dev lists object + * @return struct zvm_dev_lists* + */ +struct zvm_dev_lists* get_zvm_dev_lists(void) +{ + return &zvm_overall_dev_lists; +} + +/* + * @brief Main work of this function is to initialize zvm module. + * + * All works include: + * 1. Checkout hardware support for hypervisor; + * 2. Initialize struct variable "zvm_overall_info"; + * 3. TODO: Init zvm dev and opration function. + */ +static int zvm_init(void) +{ + int ret = 0; + void *op = NULL; + + ret = zvm_arch_init(op); + if (ret) { + ZVM_LOG_ERR("zvm_arch_init failed here ! \n"); + return ret; + } + + ret = zvm_overall_init(); + if (ret) { + ZVM_LOG_ERR("Init zvm_overall struct error. \n ZVM init failed ! \n"); + return ret; + } + + ret = zvm_devices_list_init(); + if (ret) { + ZVM_LOG_ERR("Init zvm_dev_list struct error. \n ZVM init failed ! \n"); + return ret; + } + + /*TODO: ready to init zvm_dev and it's ops */ + zvm_dev_ops_init(); + + return ret; +} + +/* For using device mmap, the level will switch to APPLICATION. */ +SYS_INIT(zvm_init, APPLICATION, CONFIG_ZVM_INIT_PRIORITY); diff --git a/subsys/zvm/zvm_shell.c b/subsys/zvm/zvm_shell.c new file mode 100644 index 00000000000000..a40449aa0095b9 --- /dev/null +++ b/subsys/zvm/zvm_shell.c @@ -0,0 +1,174 @@ +/* + * Copyright 2024-2025 HNU-ESNL: Guoqi Xie, Chenglai Xiong, Xingyu Hu and etc. + * Copyright 2024-2025 openEuler SIG-Zephyr + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +LOG_MODULE_DECLARE(ZVM_MODULE_NAME); + +#define SHELL_HELP_ZVM "ZVM manager command. " \ + "Some subcommand you can choice as below: \n" +#define SHELL_HELP_CREATE_NEW_VM "Create a vm. \n" \ + "You can use 'zvm new -t zephyr' or 'linux' to create a new vm. \n" +#define SHELL_HELP_RUN_VM "Run a created vm. \n" \ + "You can use 'zvm run -n 0' to run vm with vmid equal to 0. \n" +#define SHELL_HELP_LIST_VM "List all vm info. \n" \ + "You can use 'zvm info' to list all vm info. \n" \ + "You can use 'zvm info -n 0' to list vm info with vmid equal to 0. \n" +#define SHELL_HELP_PAUSE_VM "Pause a vm. \n" \ + "You can use 'zvm pause -n 0' to pause vm with vmid equal to 0. \n" +#define SHELL_HELP_DELETE_VM "Delete a vm. \n" \ + "You can use 'zvm delete -n 0' to delete vm with vmid equal to 0. \n" +#define SHELL_HELP_UPDATE_VM "Update vm. \n" \ + "vm update is not supported now. \n" +#define SHELL_HELP_CONNECT_VIRTUAL_SERIAL "Switch virtual serial. \n" \ + "You can use 'zvm look 0' to connect available virtual serial. \n" + +static struct k_spinlock shell_vmops_lock; + +static int cmd_zvm_new(const struct shell *zvm_shell, size_t argc, char **argv) +{ + int ret = 0; + k_spinlock_key_t key; + + key = k_spin_lock(&shell_vmops_lock); + shell_fprintf(zvm_shell, SHELL_NORMAL, "Ready to create a new vm... \n"); + + ret = zvm_new_guest(argc, argv); + if (ret) { + shell_fprintf(zvm_shell, SHELL_NORMAL, + "Create vm failured, please follow the message and try again! \n"); + k_spin_unlock(&shell_vmops_lock, key); + return ret; + } + k_spin_unlock(&shell_vmops_lock, key); + + return ret; +} + + +static int cmd_zvm_run(const struct shell *zvm_shell, size_t argc, char **argv) +{ + /* Run vm code. */ + int ret = 0; + k_spinlock_key_t key; + + key = k_spin_lock(&shell_vmops_lock); + + ret = zvm_run_guest(argc, argv); + if (ret) { + shell_fprintf(zvm_shell, SHELL_NORMAL, + "Start vm failured, please follow the message and try again! \n"); + k_spin_unlock(&shell_vmops_lock, key); + return ret; + } + + k_spin_unlock(&shell_vmops_lock, key); + + return ret; +} + + +static int cmd_zvm_pause(const struct shell *zvm_shell, size_t argc, char **argv) +{ + int ret = 0; + k_spinlock_key_t key; + + key = k_spin_lock(&shell_vmops_lock); + ret = zvm_pause_guest(argc, argv); + if (ret) { + shell_fprintf(zvm_shell, SHELL_NORMAL, + "Pause vm failured, please follow the message and try again! \n"); + k_spin_unlock(&shell_vmops_lock, key); + return ret; + } + + k_spin_unlock(&shell_vmops_lock, key); + + return ret; +} + + +static int cmd_zvm_delete(const struct shell *zvm_shell, size_t argc, char **argv) +{ + int ret = 0; + k_spinlock_key_t key; + + key = k_spin_lock(&shell_vmops_lock); + + /* Delete vm code. */ + ret = zvm_delete_guest(argc, argv); + if (ret) { + shell_fprintf(zvm_shell, SHELL_NORMAL, + "Delete vm failured, please follow the message and try again! \n"); + k_spin_unlock(&shell_vmops_lock, key); + return ret; + } + k_spin_unlock(&shell_vmops_lock, key); + + return ret; +} + + +static int cmd_zvm_info(const struct shell *zvm_shell, size_t argc, char **argv) +{ + int ret = 0; + k_spinlock_key_t key; + + key = k_spin_lock(&shell_vmops_lock); + + /* Delete vm code. */ + ret = zvm_info_guest(argc, argv); + if (ret) { + shell_fprintf(zvm_shell, SHELL_NORMAL, + "List vm failured. \n There may no vm in the system! \n"); + k_spin_unlock(&shell_vmops_lock, key); + return ret; + } + k_spin_unlock(&shell_vmops_lock, key); + + return 0; +} + + +static int cmd_zvm_update(const struct shell *zvm_shell, size_t argc, char **argv) +{ + /* Update vm code. */ + ARG_UNUSED(argc); + ARG_UNUSED(argv); + + shell_fprintf(zvm_shell, SHELL_NORMAL, + "Update vm is not support now, Please try other command. \n"); + return 0; +} + +/* Add subcommand for Root0 command zvm. */ +SHELL_STATIC_SUBCMD_SET_CREATE(m_sub_zvm, + SHELL_CMD(new, NULL, SHELL_HELP_CREATE_NEW_VM, cmd_zvm_new), + SHELL_CMD(run, NULL, SHELL_HELP_RUN_VM, cmd_zvm_run), + SHELL_CMD(pause, NULL, SHELL_HELP_PAUSE_VM, cmd_zvm_pause), + SHELL_CMD(delete, NULL, SHELL_HELP_DELETE_VM, cmd_zvm_delete), + SHELL_CMD(info, NULL, SHELL_HELP_LIST_VM, cmd_zvm_info) , + SHELL_CMD(update, NULL, SHELL_HELP_UPDATE_VM, cmd_zvm_update), + SHELL_CMD(look, NULL, SHELL_HELP_CONNECT_VIRTUAL_SERIAL, switch_virtual_serial_handler), + SHELL_SUBCMD_SET_END +); + +/* Add command for hypervisor. */ +SHELL_CMD_REGISTER(zvm, &m_sub_zvm, SHELL_HELP_ZVM, NULL); \ No newline at end of file