[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] hw/intc: riscv-imsic: Fix interrupt state updates.
From: |
Alistair Francis |
Subject: |
Re: [PATCH] hw/intc: riscv-imsic: Fix interrupt state updates. |
Date: |
Mon, 9 Sep 2024 15:05:10 +1000 |
On Sat, Sep 7, 2024 at 6:24 AM Tomasz Jeznach <tjeznach@rivosinc.com> wrote:
>
> The IMSIC state variable eistate[] is modified by CSR instructions
> within a range dedicated to the local CPU and by MMIO writes from any CPU.
> Access to eistate from MMIO accessors is protected by the BQL, but
> read-modify-write (RMW) sequences from CSRRW do not acquire the BQL,
> making the RMW sequence vulnerable to a race condition with MMIO access
> from a remote CPU.
>
> This race can manifest as missing IPI or MSI in multi-CPU systems, eg:
>
> [ 43.008092] watchdog: BUG: soft lockup - CPU#2 stuck for 27s!
> [kworker/u19:1:52]
> [ 43.011723] CPU: 2 UID: 0 PID: 52 Comm: kworker/u19:1 Not tainted
> 6.11.0-rc6
> [ 43.013070] Workqueue: events_unbound deferred_probe_work_func
> [ 43.018776] [<ffffffff800b4a86>] smp_call_function_many_cond+0x190/0x5c2
> [ 43.019205] [<ffffffff800b4f28>] on_each_cpu_cond_mask+0x20/0x32
> [ 43.019447] [<ffffffff8001069a>] __flush_tlb_range+0xf2/0x190
> [ 43.019683] [<ffffffff80010914>] flush_tlb_kernel_range+0x20/0x28
>
> The interrupt line raise/lower sequence was changed to prevent a race
> between the evaluation of the eistate and the execution of the qemu_irq
> raise/lower, ensuring that the interrupt line is not incorrectly
> deactivated based on a stale topei check result. To avoid holding BQL
> all modifications of eistate are converted to atomic operations.
>
> Signed-off-by: Tomasz Jeznach <tjeznach@rivosinc.com>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Alistair
> ---
> hw/intc/riscv_imsic.c | 50 +++++++++++++++++++++++++++----------------
> 1 file changed, 32 insertions(+), 18 deletions(-)
>
> diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
> index b90f0d731d..9ef65d4012 100644
> --- a/hw/intc/riscv_imsic.c
> +++ b/hw/intc/riscv_imsic.c
> @@ -55,7 +55,7 @@ static uint32_t riscv_imsic_topei(RISCVIMSICState *imsic,
> uint32_t page)
> (imsic->eithreshold[page] <= imsic->num_irqs)) ?
> imsic->eithreshold[page] : imsic->num_irqs;
> for (i = 1; i < max_irq; i++) {
> - if ((imsic->eistate[base + i] & IMSIC_EISTATE_ENPEND) ==
> + if ((qatomic_read(&imsic->eistate[base + i]) & IMSIC_EISTATE_ENPEND)
> ==
> IMSIC_EISTATE_ENPEND) {
> return (i << IMSIC_TOPEI_IID_SHIFT) | i;
> }
> @@ -66,10 +66,24 @@ static uint32_t riscv_imsic_topei(RISCVIMSICState *imsic,
> uint32_t page)
>
> static void riscv_imsic_update(RISCVIMSICState *imsic, uint32_t page)
> {
> + uint32_t base = page * imsic->num_irqs;
> +
> + /*
> + * Lower the interrupt line if necessary, then evaluate the current
> + * IMSIC state.
> + * This sequence ensures that any race between evaluating the eistate and
> + * updating the interrupt line will not result in an incorrectly
> + * deactivated connected CPU IRQ line.
> + * If multiple interrupts are pending, this sequence functions
> identically
> + * to qemu_irq_pulse.
> + */
> +
> + if (qatomic_fetch_and(&imsic->eistate[base], ~IMSIC_EISTATE_ENPEND)) {
> + qemu_irq_lower(imsic->external_irqs[page]);
> + }
> if (imsic->eidelivery[page] && riscv_imsic_topei(imsic, page)) {
> qemu_irq_raise(imsic->external_irqs[page]);
> - } else {
> - qemu_irq_lower(imsic->external_irqs[page]);
> + qatomic_or(&imsic->eistate[base], IMSIC_EISTATE_ENPEND);
> }
> }
>
> @@ -125,12 +139,11 @@ static int riscv_imsic_topei_rmw(RISCVIMSICState
> *imsic, uint32_t page,
> topei >>= IMSIC_TOPEI_IID_SHIFT;
> base = page * imsic->num_irqs;
> if (topei) {
> - imsic->eistate[base + topei] &= ~IMSIC_EISTATE_PENDING;
> + qatomic_and(&imsic->eistate[base + topei],
> ~IMSIC_EISTATE_PENDING);
> }
> -
> - riscv_imsic_update(imsic, page);
> }
>
> + riscv_imsic_update(imsic, page);
> return 0;
> }
>
> @@ -139,7 +152,7 @@ static int riscv_imsic_eix_rmw(RISCVIMSICState *imsic,
> uint32_t num, bool pend, target_ulong *val,
> target_ulong new_val, target_ulong wr_mask)
> {
> - uint32_t i, base;
> + uint32_t i, base, prev;
> target_ulong mask;
> uint32_t state = (pend) ? IMSIC_EISTATE_PENDING : IMSIC_EISTATE_ENABLED;
>
> @@ -157,10 +170,6 @@ static int riscv_imsic_eix_rmw(RISCVIMSICState *imsic,
>
> if (val) {
> *val = 0;
> - for (i = 0; i < xlen; i++) {
> - mask = (target_ulong)1 << i;
> - *val |= (imsic->eistate[base + i] & state) ? mask : 0;
> - }
> }
>
> for (i = 0; i < xlen; i++) {
> @@ -172,10 +181,15 @@ static int riscv_imsic_eix_rmw(RISCVIMSICState *imsic,
> mask = (target_ulong)1 << i;
> if (wr_mask & mask) {
> if (new_val & mask) {
> - imsic->eistate[base + i] |= state;
> + prev = qatomic_fetch_or(&imsic->eistate[base + i], state);
> } else {
> - imsic->eistate[base + i] &= ~state;
> + prev = qatomic_fetch_and(&imsic->eistate[base + i], ~state);
> }
> + } else {
> + prev = qatomic_read(&imsic->eistate[base + i]);
> + }
> + if (val && (prev & state)) {
> + *val |= mask;
> }
> }
>
> @@ -302,14 +316,14 @@ static void riscv_imsic_write(void *opaque, hwaddr
> addr, uint64_t value,
> page = addr >> IMSIC_MMIO_PAGE_SHIFT;
> if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) {
> if (value && (value < imsic->num_irqs)) {
> - imsic->eistate[(page * imsic->num_irqs) + value] |=
> - IMSIC_EISTATE_PENDING;
> + qatomic_or(&imsic->eistate[(page * imsic->num_irqs) + value],
> + IMSIC_EISTATE_PENDING);
> +
> + /* Update CPU external interrupt status */
> + riscv_imsic_update(imsic, page);
> }
> }
>
> - /* Update CPU external interrupt status */
> - riscv_imsic_update(imsic, page);
> -
> return;
>
> err:
>
> base-commit: fd1952d814da738ed107e05583b3e02ac11e88ff
> --
> 2.34.1
>
>