qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 02/14] ppc/xive2: Add grouping level to notification


From: Nicholas Piggin
Subject: Re: [PATCH 02/14] ppc/xive2: Add grouping level to notification
Date: Tue, 19 Nov 2024 12:08:19 +1000

On Wed Oct 16, 2024 at 7:13 AM AEST, Michael Kowal wrote:
> From: Frederic Barrat <fbarrat@linux.ibm.com>
>
> The NSR has a (so far unused) grouping level field. When a interrupt
> is presented, that field tells the hypervisor or OS if the interrupt
> is for an individual VP or for a VP-group/crowd. This patch reworks
> the presentation API to allow to set/unset the level when
> raising/accepting an interrupt.
>
> It also renames xive_tctx_ipb_update() to xive_tctx_pipr_update() as
> the IPB is only used for VP-specific target, whereas the PIPR always
> needs to be updated.
>
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> Signed-off-by: Michael Kowal <kowal@linux.ibm.com>
> ---
>  include/hw/ppc/xive.h      | 19 +++++++-
>  include/hw/ppc/xive_regs.h | 20 +++++++--
>  hw/intc/xive.c             | 90 +++++++++++++++++++++++---------------
>  hw/intc/xive2.c            | 18 ++++----
>  hw/intc/trace-events       |  2 +-
>  5 files changed, 100 insertions(+), 49 deletions(-)
>
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index 31242f0406..27ef6c1a17 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -510,6 +510,21 @@ static inline uint8_t xive_priority_to_ipb(uint8_t 
> priority)
>          0 : 1 << (XIVE_PRIORITY_MAX - priority);
>  }
>  
> +static inline uint8_t xive_priority_to_pipr(uint8_t priority)
> +{
> +    return priority > XIVE_PRIORITY_MAX ? 0xFF : priority;
> +}
> +
> +/*
> + * Convert an Interrupt Pending Buffer (IPB) register to a Pending
> + * Interrupt Priority Register (PIPR), which contains the priority of
> + * the most favored pending notification.
> + */
> +static inline uint8_t xive_ipb_to_pipr(uint8_t ibp)
> +{
> +    return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
> +}
> +
>  /*
>   * XIVE Thread Interrupt Management Aera (TIMA)
>   *
> @@ -532,8 +547,10 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, GString 
> *buf);
>  Object *xive_tctx_create(Object *cpu, XivePresenter *xptr, Error **errp);
>  void xive_tctx_reset(XiveTCTX *tctx);
>  void xive_tctx_destroy(XiveTCTX *tctx);
> -void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb);
> +void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority,
> +                           uint8_t group_level);
>  void xive_tctx_reset_signal(XiveTCTX *tctx, uint8_t ring);
> +void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level);
>  
>  /*
>   * KVM XIVE device helpers
> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
> index 326327fc79..b455728c9c 100644
> --- a/include/hw/ppc/xive_regs.h
> +++ b/include/hw/ppc/xive_regs.h
> @@ -146,7 +146,14 @@
>  #define TM_SPC_PULL_PHYS_CTX_OL 0xc38   /* Pull phys ctx to odd cache line   
>  */
>  /* XXX more... */
>  
> -/* NSR fields for the various QW ack types */
> +/*
> + * NSR fields for the various QW ack types
> + *
> + * P10 has an extra bit in QW3 for the group level instead of the
> + * reserved 'i' bit. Since it is not used and we don't support group
> + * interrupts on P9, we use the P10 definition for the group level so
> + * that we can have common macros for the NSR
> + */
>  #define TM_QW0_NSR_EB           PPC_BIT8(0)
>  #define TM_QW1_NSR_EO           PPC_BIT8(0)
>  #define TM_QW3_NSR_HE           PPC_BITMASK8(0, 1)
> @@ -154,8 +161,15 @@
>  #define  TM_QW3_NSR_HE_POOL     1
>  #define  TM_QW3_NSR_HE_PHYS     2
>  #define  TM_QW3_NSR_HE_LSI      3
> -#define TM_QW3_NSR_I            PPC_BIT8(2)
> -#define TM_QW3_NSR_GRP_LVL      PPC_BIT8(3, 7)
> +#define TM_NSR_GRP_LVL          PPC_BITMASK8(2, 7)
> +/*
> + * On P10, the format of the 6-bit group level is: 2 bits for the
> + * crowd size and 4 bits for the group size. Since group/crowd size is
> + * always a power of 2, we encode the log. For example, group_level=4
> + * means crowd size = 0 and group size = 16 (2^4)
> + * Same encoding is used in the NVP and NVGC structures for
> + * PGoFirst and PGoNext fields
> + */
>  
>  /*
>   * EAS (Event Assignment Structure)
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index efcb63e8aa..bacf518fa6 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -27,16 +27,6 @@
>   * XIVE Thread Interrupt Management context
>   */
>  
> -/*
> - * Convert an Interrupt Pending Buffer (IPB) register to a Pending
> - * Interrupt Priority Register (PIPR), which contains the priority of
> - * the most favored pending notification.
> - */
> -static uint8_t ipb_to_pipr(uint8_t ibp)
> -{
> -    return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
> -}
> -
>  static uint8_t exception_mask(uint8_t ring)
>  {
>      switch (ring) {
> @@ -87,10 +77,17 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t 
> ring)
>  
>          regs[TM_CPPR] = cppr;
>  
> -        /* Reset the pending buffer bit */
> -        alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
> +        /*
> +         * If the interrupt was for a specific VP, reset the pending
> +         * buffer bit, otherwise clear the logical server indicator
> +         */
> +        if (regs[TM_NSR] & TM_NSR_GRP_LVL) {
> +            regs[TM_NSR] &= ~TM_NSR_GRP_LVL;
> +        } else {
> +            alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
> +        }
>  
> -        /* Drop Exception bit */
> +        /* Drop the exception bit */
>          regs[TM_NSR] &= ~mask;

NSR can just be set to 0 directly instead of clearing masks.

>  
>          trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring,
> @@ -101,7 +98,7 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t 
> ring)
>      return ((uint64_t)nsr << 8) | regs[TM_CPPR];
>  }
>  
> -static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
> +void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level)
>  {
>      /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
>      uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
> @@ -111,13 +108,13 @@ static void xive_tctx_notify(XiveTCTX *tctx, uint8_t 
> ring)
>      if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) {
>          switch (ring) {
>          case TM_QW1_OS:
> -            regs[TM_NSR] |= TM_QW1_NSR_EO;
> +            regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F);
>              break;
>          case TM_QW2_HV_POOL:
> -            alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6);
> +            alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 
> 0x3F);
>              break;
>          case TM_QW3_HV_PHYS:
> -            regs[TM_NSR] |= (TM_QW3_NSR_HE_PHYS << 6);
> +            regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F);
>              break;
>          default:
>              g_assert_not_reached();


The big difference between presenting group and VP directed is that
VP can just be queued up in IPB, whereas group can not be, and must
be redistributed before they are precluded by a different interrupt.
So I wonder if we should assert if there is an existing group interrupt
in NSR being overwritten at this point.

Also should we be masking the group level here? Maybe just assert the
top 2 bits are clear, otherwise something has gone wrong if this is
chopping off bits here.

> @@ -159,7 +156,7 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t 
> ring, uint8_t cppr)
>       * Recompute the PIPR based on local pending interrupts.  The PHYS
>       * ring must take the minimum of both the PHYS and POOL PIPR values.
>       */
> -    pipr_min = ipb_to_pipr(regs[TM_IPB]);
> +    pipr_min = xive_ipb_to_pipr(regs[TM_IPB]);
>      ring_min = ring;
>  
>      /* PHYS updates also depend on POOL values */
> @@ -169,7 +166,7 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t 
> ring, uint8_t cppr)
>          /* POOL values only matter if POOL ctx is valid */
>          if (pool_regs[TM_WORD2] & 0x80) {
>  
> -            uint8_t pool_pipr = ipb_to_pipr(pool_regs[TM_IPB]);
> +            uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]);
>  
>              /*
>               * Determine highest priority interrupt and

Moving this function and changing ipb->pipr (before adding group) could
be split into its own patch, since the mechanical changes seem to be
the biggest part, would make the group change simpler to see.

> @@ -185,17 +182,27 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t 
> ring, uint8_t cppr)
>      regs[TM_PIPR] = pipr_min;
>  
>      /* CPPR has changed, check if we need to raise a pending exception */
> -    xive_tctx_notify(tctx, ring_min);
> +    xive_tctx_notify(tctx, ring_min, 0);
>  }
>  
> -void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb)
> -{
> +void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority,
> +                           uint8_t group_level)
> + {
> +    /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
> +    uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
> +    uint8_t *alt_regs = &tctx->regs[alt_ring];
>      uint8_t *regs = &tctx->regs[ring];
>  
> -    regs[TM_IPB] |= ipb;
> -    regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
> -    xive_tctx_notify(tctx, ring);
> -}
> +    if (group_level == 0) {
> +        /* VP-specific */
> +        regs[TM_IPB] |= xive_priority_to_ipb(priority);
> +        alt_regs[TM_PIPR] = xive_ipb_to_pipr(regs[TM_IPB]);
> +    } else {
> +        /* VP-group */
> +        alt_regs[TM_PIPR] = xive_priority_to_pipr(priority);
> +    }
> +    xive_tctx_notify(tctx, ring, group_level);
> + }
>  
>  /*
>   * XIVE Thread Interrupt Management Area (TIMA)
> @@ -411,13 +418,13 @@ static void xive_tm_set_os_lgs(XivePresenter *xptr, 
> XiveTCTX *tctx,
>  }
>  
>  /*
> - * Adjust the IPB to allow a CPU to process event queues of other
> + * Adjust the PIPR to allow a CPU to process event queues of other
>   * priorities during one physical interrupt cycle.
>   */
>  static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
>                                     hwaddr offset, uint64_t value, unsigned 
> size)
>  {
> -    xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 
> 0xff));
> +    xive_tctx_pipr_update(tctx, TM_QW1_OS, value & 0xff, 0);
>  }
>  
>  static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
> @@ -495,16 +502,20 @@ static void xive_tctx_need_resend(XiveRouter *xrtr, 
> XiveTCTX *tctx,
>          /* Reset the NVT value */
>          nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, 0);
>          xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
> -    }
> +
> +        uint8_t *regs = &tctx->regs[TM_QW1_OS];
> +        regs[TM_IPB] |= ipb;
> +}
> +

Whitespace damage here?

>      /*
> -     * Always call xive_tctx_ipb_update(). Even if there were no
> +     * Always call xive_tctx_pipr_update(). Even if there were no
>       * escalation triggered, there could be a pending interrupt which
>       * was saved when the context was pulled and that we need to take
>       * into account by recalculating the PIPR (which is not
>       * saved/restored).
>       * It will also raise the External interrupt signal if needed.
>       */
> -    xive_tctx_ipb_update(tctx, TM_QW1_OS, ipb);
> +    xive_tctx_pipr_update(tctx, TM_QW1_OS, 0xFF, 0); /* fxb */

I don't understand what's going on here. Why not ipb_to_pipr(ipb)?

>  }
>  
>  /*
> @@ -841,9 +852,9 @@ void xive_tctx_reset(XiveTCTX *tctx)
>       * CPPR is first set.
>       */
>      tctx->regs[TM_QW1_OS + TM_PIPR] =
> -        ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
> +        xive_ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
>      tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] =
> -        ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
> +        xive_ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
>  }
>  
>  static void xive_tctx_realize(DeviceState *dev, Error **errp)
> @@ -1660,6 +1671,12 @@ static uint32_t xive_tctx_hw_cam_line(XivePresenter 
> *xptr, XiveTCTX *tctx)
>      return xive_nvt_cam_line(blk, 1 << 7 | (pir & 0x7f));
>  }
>  
> +static uint8_t xive_get_group_level(uint32_t nvp_index)
> +{
> +    /* FIXME add crowd encoding */
> +    return ctz32(~nvp_index) + 1;
> +}
> +
>  /*
>   * The thread context register words are in big-endian format.
>   */
> @@ -1745,6 +1762,7 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t 
> format,
>  {
>      XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
>      XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
> +    uint8_t group_level;
>      int count;
>  
>      /*
> @@ -1758,9 +1776,9 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t 
> format,
>  
>      /* handle CPU exception delivery */
>      if (count) {
> -        trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring);
> -        xive_tctx_ipb_update(match.tctx, match.ring,
> -                             xive_priority_to_ipb(priority));
> +        group_level = cam_ignore ? xive_get_group_level(nvt_idx) : 0;
> +        trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, 
> group_level);
> +        xive_tctx_pipr_update(match.tctx, match.ring, priority, group_level);
>      }
>  
>      return !!count;
> diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c
> index 4adc3b6950..db372f4b30 100644
> --- a/hw/intc/xive2.c
> +++ b/hw/intc/xive2.c
> @@ -564,8 +564,10 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, 
> XiveTCTX *tctx,
>                                     uint8_t nvp_blk, uint32_t nvp_idx,
>                                     bool do_restore)
>  {
> +    uint8_t ipb, backlog_level;
> +    uint8_t backlog_prio;
> +    uint8_t *regs = &tctx->regs[TM_QW1_OS];
>      Xive2Nvp nvp;
> -    uint8_t ipb;

Put the uint8_ts all on the same line or keep them all on different
lines?

Thanks,
Nick

>  
>      /*
>       * Grab the associated thread interrupt context registers in the
> @@ -594,15 +596,15 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, 
> XiveTCTX *tctx,
>          nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, 0);
>          xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2);
>      }
> +    regs[TM_IPB] = ipb;
> +    backlog_prio = xive_ipb_to_pipr(ipb);
> +    backlog_level = 0;
> +
>      /*
> -     * Always call xive_tctx_ipb_update(). Even if there were no
> -     * escalation triggered, there could be a pending interrupt which
> -     * was saved when the context was pulled and that we need to take
> -     * into account by recalculating the PIPR (which is not
> -     * saved/restored).
> -     * It will also raise the External interrupt signal if needed.
> +     * Compute the PIPR based on the restored state.
> +     * It will raise the External interrupt signal if needed.
>       */
> -    xive_tctx_ipb_update(tctx, TM_QW1_OS, ipb);
> +    xive_tctx_pipr_update(tctx, TM_QW1_OS, backlog_prio, backlog_level);
>  }
>  
>  /*
> diff --git a/hw/intc/trace-events b/hw/intc/trace-events
> index 3dcf147198..7435728c51 100644
> --- a/hw/intc/trace-events
> +++ b/hw/intc/trace-events
> @@ -282,7 +282,7 @@ xive_router_end_notify(uint8_t end_blk, uint32_t end_idx, 
> uint32_t end_data) "EN
>  xive_router_end_escalate(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, 
> uint32_t esc_idx, uint32_t end_data) "END 0x%02x/0x%04x -> escalate END 
> 0x%02x/0x%04x data 0x%08x"
>  xive_tctx_tm_write(uint32_t index, uint64_t offset, unsigned int size, 
> uint64_t value) "target=%d @0x%"PRIx64" sz=%d val=0x%" PRIx64
>  xive_tctx_tm_read(uint32_t index, uint64_t offset, unsigned int size, 
> uint64_t value) "target=%d @0x%"PRIx64" sz=%d val=0x%" PRIx64
> -xive_presenter_notify(uint8_t nvt_blk, uint32_t nvt_idx, uint8_t ring) 
> "found NVT 0x%x/0x%x ring=0x%x"
> +xive_presenter_notify(uint8_t nvt_blk, uint32_t nvt_idx, uint8_t ring, 
> uint8_t group_level) "found NVT 0x%x/0x%x ring=0x%x group_level=%d"
>  xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 
> 0x%x/0x%x @0x%"PRIx64
>  
>  # pnv_xive.c




reply via email to

[Prev in Thread] Current Thread [Next in Thread]