Re: [PATCH 06/14] ppc/xive2: Process group backlog when updating the CPP

qemu-ppc
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 06/14] ppc/xive2: Process group backlog when updating the CPP

From:	Nicholas Piggin
Subject:	Re: [PATCH 06/14] ppc/xive2: Process group backlog when updating the CPPR
Date:	Tue, 19 Nov 2024 14:34:00 +1000
On Wed Oct 16, 2024 at 7:13 AM AEST, Michael Kowal wrote:
> From: Frederic Barrat <fbarrat@linux.ibm.com>
>
> When the hypervisor or OS pushes a new value to the CPPR, if the LSMFB
> value is lower than the new CPPR value, there could be a pending group
> interrupt in the backlog, so it needs to be scanned.
>
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> Signed-off-by: Michael Kowal <kowal@linux.ibm.com>
> ---
>  include/hw/ppc/xive2.h |   4 +
>  hw/intc/xive.c         |   4 +-
>  hw/intc/xive2.c        | 173 ++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 177 insertions(+), 4 deletions(-)
>
> diff --git a/include/hw/ppc/xive2.h b/include/hw/ppc/xive2.h
> index d88db05687..e61b978f37 100644
> --- a/include/hw/ppc/xive2.h
> +++ b/include/hw/ppc/xive2.h
> @@ -115,6 +115,10 @@ typedef struct Xive2EndSource {
>   * XIVE2 Thread Interrupt Management Area (POWER10)
>   */
>  
> +void xive2_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
> +                          hwaddr offset, uint64_t value, unsigned size);
> +void xive2_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
> +                          hwaddr offset, uint64_t value, unsigned size);
>  void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
>                             uint64_t value, unsigned size);
>  uint64_t xive2_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 8ffcac4f65..2aa6e1fecc 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -603,7 +603,7 @@ static const XiveTmOp xive2_tm_operations[] = {
>       * MMIOs below 2K : raw values and special operations without side
>       * effects
>       */
> -    { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive_tm_set_os_cppr,
> +    { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive2_tm_set_os_cppr,
>                                                       NULL },
>      { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive2_tm_push_os_ctx,
>                                                       NULL },
> @@ -611,7 +611,7 @@ static const XiveTmOp xive2_tm_operations[] = {
>                                                       NULL },
>      { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS,        1, xive_tm_set_os_lgs,
>                                                       NULL },
> -    { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive_tm_set_hv_cppr,
> +    { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive2_tm_set_hv_cppr,
>                                                       NULL },
>      { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
>                                                       NULL },
> diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c
> index 7130892482..0c53f71879 100644
> --- a/hw/intc/xive2.c
> +++ b/hw/intc/xive2.c
> @@ -18,6 +18,7 @@
>  #include "hw/ppc/xive.h"
>  #include "hw/ppc/xive2.h"
>  #include "hw/ppc/xive2_regs.h"
> +#include "trace.h"
>  
>  uint32_t xive2_router_get_config(Xive2Router *xrtr)
>  {
> @@ -764,6 +765,172 @@ void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX 
> *tctx,
>      }
>  }
>  
> +static int xive2_tctx_get_nvp_indexes(XiveTCTX *tctx, uint8_t ring,
> +                                      uint32_t *nvp_blk, uint32_t *nvp_idx)
> +{
> +    uint32_t w2, cam;
> +
> +    w2 = xive_tctx_word2(&tctx->regs[ring]);
> +    switch (ring) {
> +    case TM_QW1_OS:
> +        if (!(be32_to_cpu(w2) & TM2_QW1W2_VO)) {
> +            return -1;
> +        }
> +        cam = xive_get_field32(TM2_QW1W2_OS_CAM, w2);
> +        break;
> +    case TM_QW2_HV_POOL:
> +        if (!(be32_to_cpu(w2) & TM2_QW2W2_VP)) {
> +            return -1;
> +        }
> +        cam = xive_get_field32(TM2_QW2W2_POOL_CAM, w2);
> +        break;
> +    case TM_QW3_HV_PHYS:
> +        if (!(be32_to_cpu(w2) & TM2_QW3W2_VT)) {
> +            return -1;
> +        }
> +        cam = xive2_tctx_hw_cam_line(tctx->xptr, tctx);
> +        break;
> +    default:
> +        return -1;
> +    }
> +    *nvp_blk = xive2_nvp_blk(cam);
> +    *nvp_idx = xive2_nvp_idx(cam);
> +    return 0;
> +}
> +
> +static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)

Some of the xive1 code kind of has placeholder for group code or routes
group stuff through to xive2 code, so I wonder if this duplication is
really necessary or it can just be added to xive1?

I kind of hoped we could unify xive1 and 2 more, but maybe it's too late
without a lot more work, and all new development is going to go into
xive2...

Okay for now I guess, we could look at unification one day maybe.

> +{
> +    uint8_t *regs = &tctx->regs[ring];
> +    Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr);
> +    uint8_t old_cppr, backlog_prio, first_group, group_level = 0;
> +    uint8_t pipr_min, lsmfb_min, ring_min;
> +    bool group_enabled;
> +    uint32_t nvp_blk, nvp_idx;
> +    Xive2Nvp nvp;
> +    int rc;
> +
> +    trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring,
> +                             regs[TM_IPB], regs[TM_PIPR],
> +                             cppr, regs[TM_NSR]);
> +
> +    if (cppr > XIVE_PRIORITY_MAX) {
> +        cppr = 0xff;
> +    }
> +
> +    old_cppr = regs[TM_CPPR];
> +    regs[TM_CPPR] = cppr;

If CPPR remains the same, can return early.

If CPPR is being increased, this scanning is not required (a
redistribution of group interrupt if it became precluded is
required as noted in the TODO, but no scanning should be needed
so that TODO should be moved up here).

If there is an interrupt already presented and CPPR is being
lowered, nothing needs to be done either (because the presented
interrupt should already be the most favoured).

> +
> +    /*
> +     * Recompute the PIPR based on local pending interrupts. It will
> +     * be adjusted below if needed in case of pending group interrupts.
> +     */
> +    pipr_min = xive_ipb_to_pipr(regs[TM_IPB]);
> +    group_enabled = !!regs[TM_LGS];
> +    lsmfb_min = (group_enabled) ? regs[TM_LSMFB] : 0xff;
> +    ring_min = ring;
> +
> +    /* PHYS updates also depend on POOL values */
> +    if (ring == TM_QW3_HV_PHYS) {
> +        uint8_t *pregs = &tctx->regs[TM_QW2_HV_POOL];
> +
> +        /* POOL values only matter if POOL ctx is valid */
> +        if (pregs[TM_WORD2] & 0x80) {
> +
> +            uint8_t pool_pipr = xive_ipb_to_pipr(pregs[TM_IPB]);
> +            uint8_t pool_lsmfb = pregs[TM_LSMFB];
> +
> +            /*
> +             * Determine highest priority interrupt and
> +             * remember which ring has it.
> +             */
> +            if (pool_pipr < pipr_min) {
> +                pipr_min = pool_pipr;
> +                if (pool_pipr < lsmfb_min) {
> +                    ring_min = TM_QW2_HV_POOL;
> +                }
> +            }
> +
> +            /* Values needed for group priority calculation */
> +            if (pregs[TM_LGS] && (pool_lsmfb < lsmfb_min)) {
> +                group_enabled = true;
> +                lsmfb_min = pool_lsmfb;
> +                if (lsmfb_min < pipr_min) {
> +                    ring_min = TM_QW2_HV_POOL;
> +                }
> +            }
> +        }
> +    }
> +    regs[TM_PIPR] = pipr_min;
> +
> +    rc = xive2_tctx_get_nvp_indexes(tctx, ring_min, &nvp_blk, &nvp_idx);
> +    if (rc) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: set CPPR on invalid 
> context\n");
> +        return;
> +    }
> +
> +    if (cppr < old_cppr) {
> +        /*
> +         * FIXME: check if there's a group interrupt being presented
> +         * and if the new cppr prevents it. If so, then the group
> +         * interrupt needs to be re-added to the backlog and
> +         * re-triggered (see re-trigger END info in the NVGC
> +         * structure)
> +         */
> +    }
> +
> +    if (group_enabled &&
> +        lsmfb_min < cppr &&
> +        lsmfb_min < regs[TM_PIPR]) {
> +        /*
> +         * Thread has seen a group interrupt with a higher priority
> +         * than the new cppr or pending local interrupt. Check the
> +         * backlog
> +         */
> +        if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) {
> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n",
> +                          nvp_blk, nvp_idx);
> +            return;
> +        }
> +
> +        if (!xive2_nvp_is_valid(&nvp)) {
> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVP %x/%x\n",
> +                          nvp_blk, nvp_idx);
> +            return;
> +        }
> +
> +        first_group = xive_get_field32(NVP2_W0_PGOFIRST, nvp.w0);
> +        if (!first_group) {
> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVP %x/%x\n",
> +                          nvp_blk, nvp_idx);
> +            return;
> +        }
> +
> +        backlog_prio = xive2_presenter_backlog_check(tctx->xptr,
> +                                                     nvp_blk, nvp_idx,
> +                                                     first_group, 
> &group_level);
> +        tctx->regs[ring_min + TM_LSMFB] = backlog_prio;

LSMFB may not be the same as lsmfb_min, so you can't present
unconditionally.

I think after updating, it should test

  if (lsmfb_min != backlog_prio) {
      goto scan_again;
  }

Where scan_again: goes back to recomputing min priorities and scanning.

Thanks,
Nick

> +        if (backlog_prio != 0xFF) {
> +            xive2_presenter_backlog_decr(tctx->xptr, nvp_blk, nvp_idx,
> +                                         backlog_prio, group_level);
> +            regs[TM_PIPR] = backlog_prio;
> +        }
> +    }
> +    /* CPPR has changed, check if we need to raise a pending exception */
> +    xive_tctx_notify(tctx, ring_min, group_level);
> +}
> +
> +void xive2_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
> +                          hwaddr offset, uint64_t value, unsigned size)
> +{
> +    xive2_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
> +}
> +
> +void xive2_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
> +                          hwaddr offset, uint64_t value, unsigned size)
> +{
> +    xive2_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
> +}
> +
>  static void xive2_tctx_set_target(XiveTCTX *tctx, uint8_t ring, uint8_t 
> target)
>  {
>      uint8_t *regs = &tctx->regs[ring];
> @@ -934,7 +1101,9 @@ int xive2_presenter_tctx_match(XivePresenter *xptr, 
> XiveTCTX *tctx,
>  
>  bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority)
>  {
> -    uint8_t *regs = &tctx->regs[ring];
> +    /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
> +    uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
> +    uint8_t *alt_regs = &tctx->regs[alt_ring];
>  
>      /*
>       * The xive2_presenter_tctx_match() above tells if there's a match
> @@ -942,7 +1111,7 @@ bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, 
> uint8_t priority)
>       * priority to know if the thread can take the interrupt now or if
>       * it is precluded.
>       */
> -    if (priority < regs[TM_CPPR]) {
> +    if (priority < alt_regs[TM_CPPR]) {
>          return false;
>      }
>      return true;

These last two are logically separate patch for enabling group for POOL?
[Prev in Thread]
Current Thread
[Next in Thread]
Re: [PATCH 06/14] ppc/xive2: Process group backlog when updating the CPPR, Nicholas Piggin <=
- Re: [PATCH 06/14] ppc/xive2: Process group backlog when updating the CPPR, Mike Kowal, 2024/11/21
Prev by Date: Re: [PATCH 05/14] ppc/xive2: Process group backlog when pushing an OS context
Next by Date: Re: [PATCH 12/14] pnv/xive: Support ESB Escalation
Previous by thread: Re: [PATCH 05/14] ppc/xive2: Process group backlog when pushing an OS context
Next by thread: Re: [PATCH 06/14] ppc/xive2: Process group backlog when updating the CPPR
Index(es):
- Date
- Thread