qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v3 1/4] ppc/xics: Make the ICSState a list


From: David Gibson
Subject: Re: [Qemu-devel] [PATCH v3 1/4] ppc/xics: Make the ICSState a list
Date: Fri, 8 Jul 2016 15:16:09 +1000
User-agent: Mutt/1.6.1 (2016-04-27)

On Fri, Jul 08, 2016 at 10:20:32AM +0530, Nikunj A Dadhania wrote:
> David Gibson <address@hidden> writes:
> 
> > [ Unknown signature status ]
> > On Thu, Jul 07, 2016 at 11:24:15PM +0530, Nikunj A Dadhania wrote:
> >> From: Benjamin Herrenschmidt <address@hidden>
> >> 
> >> Instead of an array of fixed sized blocks, use a list, as we will need
> >> to have sources with variable number of interrupts. SPAPR only uses
> >> a single entry. Native will create more. If performance becomes an
> >> issue we can add some hashed lookup but for now this will do fine.
> >> 
> >> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
> >> [ move the initialization of list to xics_common_initfn,
> >>   restore xirr_owner after migration and move restoring to
> >>   icp_post_load]
> >> Signed-off-by: Nikunj A Dadhania <address@hidden>
> >> ---
> >>  hw/intc/trace-events  |   5 +-
> >>  hw/intc/xics.c        | 134 
> >> ++++++++++++++++++++++++++++++++------------------
> >>  hw/intc/xics_kvm.c    |  27 +++++++---
> >>  hw/intc/xics_spapr.c  |  88 +++++++++++++++++++++------------
> >>  hw/ppc/spapr_events.c |   2 +-
> >>  hw/ppc/spapr_pci.c    |   5 +-
> >>  hw/ppc/spapr_vio.c    |   2 +-
> >>  include/hw/ppc/xics.h |  13 ++---
> >>  8 files changed, 175 insertions(+), 101 deletions(-)
> >> 
> >> diff --git a/hw/intc/trace-events b/hw/intc/trace-events
> >> index 376dd18..41ced33 100644
> >> --- a/hw/intc/trace-events
> >> +++ b/hw/intc/trace-events
> >> @@ -56,10 +56,11 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: 
> >> srcno %d [irq %#x]"
> >>  xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) 
> >> "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
> >>  xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
> >>  xics_ics_eoi(int nr) "ics_eoi: irq %#x"
> >> -xics_alloc(int src, int irq) "source#%d, irq %d"
> >> -xics_alloc_block(int src, int first, int num, bool lsi, int align) 
> >> "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d"
> >> +xics_alloc(int irq) "irq %d"
> >> +xics_alloc_block(int first, int num, bool lsi, int align) "first irq %d, 
> >> %d irqs, lsi=%d, alignnum %d"
> >>  xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d 
> >> irqs"
> >>  xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free"
> >> +xics_icp_post_load(uint32_t server_no, uint32_t xirr, uint64_t addr, 
> >> uint8_t pend) "server_no %d, xirr %#x, xirr_owner 0x%" PRIx64 ", pending 
> >> %d"
> >>  
> >>  # hw/intc/s390_flic_kvm.c
> >>  flic_create_device(int err) "flic: create device failed %d"
> >> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> >> index cd48f42..0af05c9 100644
> >> --- a/hw/intc/xics.c
> >> +++ b/hw/intc/xics.c
> >> @@ -32,6 +32,7 @@
> >>  #include "hw/hw.h"
> >>  #include "trace.h"
> >>  #include "qemu/timer.h"
> >> +#include "hw/ppc/spapr.h"
> >>  #include "hw/ppc/xics.h"
> >>  #include "qemu/error-report.h"
> >>  #include "qapi/visitor.h"
> >> @@ -96,13 +97,16 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
> >>  static void xics_common_reset(DeviceState *d)
> >>  {
> >>      XICSState *xics = XICS_COMMON(d);
> >> +    ICSState *ics;
> >>      int i;
> >>  
> >>      for (i = 0; i < xics->nr_servers; i++) {
> >>          device_reset(DEVICE(&xics->ss[i]));
> >>      }
> >>  
> >> -    device_reset(DEVICE(xics->ics));
> >> +    QLIST_FOREACH(ics, &xics->ics, list) {
> >> +        device_reset(DEVICE(ics));
> >> +    }
> >>  }
> >>  
> >>  static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char 
> >> *name,
> >> @@ -134,7 +138,6 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor 
> >> *v, const char *name,
> >>      }
> >>  
> >>      assert(info->set_nr_irqs);
> >> -    assert(xics->ics);
> >>      info->set_nr_irqs(xics, value, errp);
> >>  }
> >>  
> >> @@ -174,6 +177,9 @@ static void xics_prop_set_nr_servers(Object *obj, 
> >> Visitor *v,
> >>  
> >>  static void xics_common_initfn(Object *obj)
> >>  {
> >> +    XICSState *xics = XICS_COMMON(obj);
> >> +
> >> +    QLIST_INIT(&xics->ics);
> >>      object_property_add(obj, "nr_irqs", "int",
> >>                          xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
> >>                          NULL, NULL, NULL);
> >> @@ -212,33 +218,35 @@ static void ics_reject(ICSState *ics, int nr);
> >>  static void ics_resend(ICSState *ics);
> >>  static void ics_eoi(ICSState *ics, int nr);
> >>  
> >> -static void icp_check_ipi(XICSState *xics, int server)
> >> +static void icp_check_ipi(ICPState *ss)
> >>  {
> >> -    ICPState *ss = xics->ss + server;
> >> -
> >>      if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
> >>          return;
> >>      }
> >>  
> >> -    trace_xics_icp_check_ipi(server, ss->mfrr);
> >> +    trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
> >
> > The dt_id might be more useful here, given the changes that are going
> > on in that area.  But it's not worth holding up this series just for
> > that.
> 
> Yes, I am seeing those changes, will adjust accordingly if that goes
> first.
> 
> >
> >>  
> >> -    if (XISR(ss)) {
> >> -        ics_reject(xics->ics, XISR(ss));
> >> +    if (XISR(ss) && ss->xirr_owner) {
> >> +        ics_reject(ss->xirr_owner, XISR(ss));
> >>      }
> >>  
> >>      ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
> >>      ss->pending_priority = ss->mfrr;
> >> +    ss->xirr_owner = NULL;
> >>      qemu_irq_raise(ss->output);
> >>  }
> >>  
> >>  static void icp_resend(XICSState *xics, int server)
> >>  {
> >>      ICPState *ss = xics->ss + server;
> >> +    ICSState *ics;
> >
> > Possibly this should take icp instead of xics for consistency.  Or
> > else be renamed xics_resend().  But that can be a cleanup for another
> > time.
> >
> >>      if (ss->mfrr < CPPR(ss)) {
> >> -        icp_check_ipi(xics, server);
> >> +        icp_check_ipi(ss);
> >> +    }
> >> +    QLIST_FOREACH(ics, &xics->ics, list) {
> >> +        ics_resend(ics);
> >>      }
> >> -    ics_resend(xics->ics);
> >>  }
> >>  
> >>  void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
> >> @@ -256,7 +264,10 @@ void icp_set_cppr(XICSState *xics, int server, 
> >> uint8_t cppr)
> >>              ss->xirr &= ~XISR_MASK; /* Clear XISR */
> >>              ss->pending_priority = 0xff;
> >>              qemu_irq_lower(ss->output);
> >> -            ics_reject(xics->ics, old_xisr);
> >> +            if (ss->xirr_owner) {
> >> +                ics_reject(ss->xirr_owner, old_xisr);
> >> +                ss->xirr_owner = NULL;
> >> +            }
> >>          }
> >>      } else {
> >>          if (!XISR(ss)) {
> >> @@ -271,7 +282,7 @@ void icp_set_mfrr(XICSState *xics, int server, uint8_t 
> >> mfrr)
> >>  
> >>      ss->mfrr = mfrr;
> >>      if (mfrr < CPPR(ss)) {
> >> -        icp_check_ipi(xics, server);
> >> +        icp_check_ipi(ss);
> >>      }
> >>  }
> >>  
> >> @@ -282,6 +293,7 @@ uint32_t icp_accept(ICPState *ss)
> >>      qemu_irq_lower(ss->output);
> >>      ss->xirr = ss->pending_priority << 24;
> >>      ss->pending_priority = 0xff;
> >> +    ss->xirr_owner = NULL;
> >>  
> >>      trace_xics_icp_accept(xirr, ss->xirr);
> >>  
> >> @@ -299,30 +311,40 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
> >>  void icp_eoi(XICSState *xics, int server, uint32_t xirr)
> >>  {
> >>      ICPState *ss = xics->ss + server;
> >> +    ICSState *ics;
> >> +    uint32_t irq;
> >>  
> >>      /* Send EOI -> ICS */
> >>      ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
> >>      trace_xics_icp_eoi(server, xirr, ss->xirr);
> >> -    ics_eoi(xics->ics, xirr & XISR_MASK);
> >> +    irq = xirr & XISR_MASK;
> >> +    QLIST_FOREACH(ics, &xics->ics, list) {
> >> +        if (ics_valid_irq(ics, irq)) {
> >> +            ics_eoi(ics, irq);
> >
> > You could slightly optimize this by adding a break; here, but again
> > that can be a tweak for another time.
> 
> Sure.
> 
> >
> >> +        }
> >> +    }
> >>      if (!XISR(ss)) {
> >>          icp_resend(xics, server);
> >>      }
> >>  }
> >>  
> >> -static void icp_irq(XICSState *xics, int server, int nr, uint8_t priority)
> >> +static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
> >>  {
> >> +    XICSState *xics = ics->xics;
> >>      ICPState *ss = xics->ss + server;
> >
> > It's kind of weird for an icp_() function to take an ics but not an
> > icp.  It might make more sense for it to take both, even though
> > obviously it can be derived from just the ics.  Again, won't hold up
> > the series for this.
> >
> >>      trace_xics_icp_irq(server, nr, priority);
> >>  
> >>      if ((priority >= CPPR(ss))
> >>          || (XISR(ss) && (ss->pending_priority <= priority))) {
> >> -        ics_reject(xics->ics, nr);
> >> +        ics_reject(ics, nr);
> >>      } else {
> >> -        if (XISR(ss)) {
> >> -            ics_reject(xics->ics, XISR(ss));
> >> +        if (XISR(ss) && ss->xirr_owner) {
> >> +            ics_reject(ss->xirr_owner, XISR(ss));
> >> +            ss->xirr_owner = NULL;
> >>          }
> >>          ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
> >> +        ss->xirr_owner = ics;
> >>          ss->pending_priority = priority;
> >>          trace_xics_icp_raise(ss->xirr, ss->pending_priority);
> >>          qemu_irq_raise(ss->output);
> >> @@ -378,12 +400,42 @@ static void icp_reset(DeviceState *dev)
> >>      qemu_set_irq(icp->output, 0);
> >>  }
> >>  
> >> +static int icp_post_load(ICPState *ss, int version_id)
> >> +{
> >> +    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >> +    XICSState *xics = spapr->xics;
> >> +    uint32_t irq, i;
> >> +    static uint32_t server_no = 0;
> >
> > Eugh.. static locals are usually horrid.
> >
> >> +    server_no++;
> >> +    irq = XISR(ss);
> >> +    if (!ss->cs || !irq)
> >> +        goto icpend;
> >
> > That's an awful use of a goto - inverting the if sense is clearer and
> > no longer.
> 
> Sure.
> 
> >
> >> +
> >> +    /* Restore the xirr_owner */
> >> +    ss->xirr_owner = xics_find_source(xics, irq);
> >
> > Do we actually need this?  Have you confirmed that replaying the
> > icp_resend()s won't already accomplish this?
> 
> Yes, i have had migration hang on the target. Other thing its only
> affecting "kernel-irqchip=off"

Hrm.. It would be good to understand exactly what is going on here so
we can be confident we've solved it correctly.


> >> + icpend:
> >> +    trace_xics_icp_post_load(server_no, ss->xirr, 
> >> (uint64_t)ss->xirr_owner, ss->pending_priority);
> >> +    if (xics->nr_servers != server_no)
> >> +        return 0;
> >> +
> >> +    /* All the ICPs are processed, not restore all the state */
> >> +    for (i = 0; i < xics->nr_servers; i++) {
> >> +        icp_resend(xics, i);
> >> +    }
> >> +    server_no = 0;
> >
> > Eugh.  This is really ugly.  It really looks like we need some
> > migration state on the overall xics, not just the component icps and
> > icss.  Dealing with backwards compatibility will be hairy, but I
> > really hope we can do better than this hack.
> 
> Earlier icp_resend() was called from ics_post_load() path, and
> icp_post_load gets called after ics_post_load, the xirr_owner is not
> restored during ics_post_load.
> 
> If we can get icp_post_load called before ics_post_load, we would not
> need the above code. Or as you suggested somehere above this.

Hm, ok.  I'm wondering if we can move most of the post_load logic to a
top-level xicsstate structure.  I think the migration layer guarantees
that parent objects have their post_load called after all the
subordinate objects have their state restore.

Migration compat could be tricky.  Since I don't think we need any
actual associated with the top-level object it should be possible in
theory, but I'm not sure if there's inbuilt support for an object with
no on-wire presence, but just a post_load handler.

> 
> >
> >> +    return 0;
> >> +}
> >> +
> >>  static void icp_class_init(ObjectClass *klass, void *data)
> >>  {
> >>      DeviceClass *dc = DEVICE_CLASS(klass);
> >> +    ICPStateClass *icpc = ICP_CLASS(klass);
> >>  
> >>      dc->reset = icp_reset;
> >>      dc->vmsd = &vmstate_icp_server;
> >> +    icpc->post_load = icp_post_load;
> >>  }
> >>  
> >>  static const TypeInfo icp_info = {
> >> @@ -405,8 +457,7 @@ static void resend_msi(ICSState *ics, int srcno)
> >>      if (irq->status & XICS_STATUS_REJECTED) {
> >>          irq->status &= ~XICS_STATUS_REJECTED;
> >>          if (irq->priority != 0xff) {
> >> -            icp_irq(ics->xics, irq->server, srcno + ics->offset,
> >> -                    irq->priority);
> >> +            icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
> >>          }
> >>      }
> >>  }
> >> @@ -419,7 +470,7 @@ static void resend_lsi(ICSState *ics, int srcno)
> >>          && (irq->status & XICS_STATUS_ASSERTED)
> >>          && !(irq->status & XICS_STATUS_SENT)) {
> >>          irq->status |= XICS_STATUS_SENT;
> >> -        icp_irq(ics->xics, irq->server, srcno + ics->offset, 
> >> irq->priority);
> >> +        icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
> >>      }
> >>  }
> >>  
> >> @@ -434,7 +485,7 @@ static void set_irq_msi(ICSState *ics, int srcno, int 
> >> val)
> >>              irq->status |= XICS_STATUS_MASKED_PENDING;
> >>              trace_xics_masked_pending();
> >>          } else  {
> >> -            icp_irq(ics->xics, irq->server, srcno + ics->offset, 
> >> irq->priority);
> >> +            icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
> >>          }
> >>      }
> >>  }
> >> @@ -473,7 +524,7 @@ static void write_xive_msi(ICSState *ics, int srcno)
> >>      }
> >>  
> >>      irq->status &= ~XICS_STATUS_MASKED_PENDING;
> >> -    icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
> >> +    icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
> >>  }
> >>  
> >>  static void write_xive_lsi(ICSState *ics, int srcno)
> >> @@ -505,8 +556,11 @@ static void ics_reject(ICSState *ics, int nr)
> >>      ICSIRQState *irq = ics->irqs + nr - ics->offset;
> >>  
> >>      trace_xics_ics_reject(nr, nr - ics->offset);
> >> -    irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for 
> >> LSI */
> >> -    irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI 
> >> */
> >> +    if (irq->flags & XICS_FLAGS_IRQ_MSI) {
> >> +      irq->status |= XICS_STATUS_REJECTED;
> >> +    } else {
> >> +      irq->status &= ~XICS_STATUS_SENT;
> >> +    }
> >
> > This change appears unrelated to the rest.  If not that needs an 
> > explanation.
> 
> I was seeing inconsistent status because of this in the trace logs, like
> for LSI status as 0x7, i.e. XICS_STATUS_ASSERTED, XICS_STATUS_SENT and
> XICS_STATUS_REJECTED all set, which did not make sense. So the REJECTED
> would have been set in earlier interrupt cycle, and then asserted and
> sent in this current one.

Ah, ok.  Can you split this out into a separate patch then, with an
explanation that it's to remove confusing debug output?

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]