[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH v3 1/4] ppc/xics: Make the ICSState a list
From: |
David Gibson |
Subject: |
Re: [Qemu-ppc] [PATCH v3 1/4] ppc/xics: Make the ICSState a list |
Date: |
Fri, 8 Jul 2016 15:16:09 +1000 |
User-agent: |
Mutt/1.6.1 (2016-04-27) |
On Fri, Jul 08, 2016 at 10:20:32AM +0530, Nikunj A Dadhania wrote:
> David Gibson <address@hidden> writes:
>
> > [ Unknown signature status ]
> > On Thu, Jul 07, 2016 at 11:24:15PM +0530, Nikunj A Dadhania wrote:
> >> From: Benjamin Herrenschmidt <address@hidden>
> >>
> >> Instead of an array of fixed sized blocks, use a list, as we will need
> >> to have sources with variable number of interrupts. SPAPR only uses
> >> a single entry. Native will create more. If performance becomes an
> >> issue we can add some hashed lookup but for now this will do fine.
> >>
> >> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
> >> [ move the initialization of list to xics_common_initfn,
> >> restore xirr_owner after migration and move restoring to
> >> icp_post_load]
> >> Signed-off-by: Nikunj A Dadhania <address@hidden>
> >> ---
> >> hw/intc/trace-events | 5 +-
> >> hw/intc/xics.c | 134
> >> ++++++++++++++++++++++++++++++++------------------
> >> hw/intc/xics_kvm.c | 27 +++++++---
> >> hw/intc/xics_spapr.c | 88 +++++++++++++++++++++------------
> >> hw/ppc/spapr_events.c | 2 +-
> >> hw/ppc/spapr_pci.c | 5 +-
> >> hw/ppc/spapr_vio.c | 2 +-
> >> include/hw/ppc/xics.h | 13 ++---
> >> 8 files changed, 175 insertions(+), 101 deletions(-)
> >>
> >> diff --git a/hw/intc/trace-events b/hw/intc/trace-events
> >> index 376dd18..41ced33 100644
> >> --- a/hw/intc/trace-events
> >> +++ b/hw/intc/trace-events
> >> @@ -56,10 +56,11 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi:
> >> srcno %d [irq %#x]"
> >> xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority)
> >> "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
> >> xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
> >> xics_ics_eoi(int nr) "ics_eoi: irq %#x"
> >> -xics_alloc(int src, int irq) "source#%d, irq %d"
> >> -xics_alloc_block(int src, int first, int num, bool lsi, int align)
> >> "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d"
> >> +xics_alloc(int irq) "irq %d"
> >> +xics_alloc_block(int first, int num, bool lsi, int align) "first irq %d,
> >> %d irqs, lsi=%d, alignnum %d"
> >> xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d
> >> irqs"
> >> xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free"
> >> +xics_icp_post_load(uint32_t server_no, uint32_t xirr, uint64_t addr,
> >> uint8_t pend) "server_no %d, xirr %#x, xirr_owner 0x%" PRIx64 ", pending
> >> %d"
> >>
> >> # hw/intc/s390_flic_kvm.c
> >> flic_create_device(int err) "flic: create device failed %d"
> >> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> >> index cd48f42..0af05c9 100644
> >> --- a/hw/intc/xics.c
> >> +++ b/hw/intc/xics.c
> >> @@ -32,6 +32,7 @@
> >> #include "hw/hw.h"
> >> #include "trace.h"
> >> #include "qemu/timer.h"
> >> +#include "hw/ppc/spapr.h"
> >> #include "hw/ppc/xics.h"
> >> #include "qemu/error-report.h"
> >> #include "qapi/visitor.h"
> >> @@ -96,13 +97,16 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
> >> static void xics_common_reset(DeviceState *d)
> >> {
> >> XICSState *xics = XICS_COMMON(d);
> >> + ICSState *ics;
> >> int i;
> >>
> >> for (i = 0; i < xics->nr_servers; i++) {
> >> device_reset(DEVICE(&xics->ss[i]));
> >> }
> >>
> >> - device_reset(DEVICE(xics->ics));
> >> + QLIST_FOREACH(ics, &xics->ics, list) {
> >> + device_reset(DEVICE(ics));
> >> + }
> >> }
> >>
> >> static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char
> >> *name,
> >> @@ -134,7 +138,6 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor
> >> *v, const char *name,
> >> }
> >>
> >> assert(info->set_nr_irqs);
> >> - assert(xics->ics);
> >> info->set_nr_irqs(xics, value, errp);
> >> }
> >>
> >> @@ -174,6 +177,9 @@ static void xics_prop_set_nr_servers(Object *obj,
> >> Visitor *v,
> >>
> >> static void xics_common_initfn(Object *obj)
> >> {
> >> + XICSState *xics = XICS_COMMON(obj);
> >> +
> >> + QLIST_INIT(&xics->ics);
> >> object_property_add(obj, "nr_irqs", "int",
> >> xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
> >> NULL, NULL, NULL);
> >> @@ -212,33 +218,35 @@ static void ics_reject(ICSState *ics, int nr);
> >> static void ics_resend(ICSState *ics);
> >> static void ics_eoi(ICSState *ics, int nr);
> >>
> >> -static void icp_check_ipi(XICSState *xics, int server)
> >> +static void icp_check_ipi(ICPState *ss)
> >> {
> >> - ICPState *ss = xics->ss + server;
> >> -
> >> if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
> >> return;
> >> }
> >>
> >> - trace_xics_icp_check_ipi(server, ss->mfrr);
> >> + trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
> >
> > The dt_id might be more useful here, given the changes that are going
> > on in that area. But it's not worth holding up this series just for
> > that.
>
> Yes, I am seeing those changes, will adjust accordingly if that goes
> first.
>
> >
> >>
> >> - if (XISR(ss)) {
> >> - ics_reject(xics->ics, XISR(ss));
> >> + if (XISR(ss) && ss->xirr_owner) {
> >> + ics_reject(ss->xirr_owner, XISR(ss));
> >> }
> >>
> >> ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
> >> ss->pending_priority = ss->mfrr;
> >> + ss->xirr_owner = NULL;
> >> qemu_irq_raise(ss->output);
> >> }
> >>
> >> static void icp_resend(XICSState *xics, int server)
> >> {
> >> ICPState *ss = xics->ss + server;
> >> + ICSState *ics;
> >
> > Possibly this should take icp instead of xics for consistency. Or
> > else be renamed xics_resend(). But that can be a cleanup for another
> > time.
> >
> >> if (ss->mfrr < CPPR(ss)) {
> >> - icp_check_ipi(xics, server);
> >> + icp_check_ipi(ss);
> >> + }
> >> + QLIST_FOREACH(ics, &xics->ics, list) {
> >> + ics_resend(ics);
> >> }
> >> - ics_resend(xics->ics);
> >> }
> >>
> >> void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
> >> @@ -256,7 +264,10 @@ void icp_set_cppr(XICSState *xics, int server,
> >> uint8_t cppr)
> >> ss->xirr &= ~XISR_MASK; /* Clear XISR */
> >> ss->pending_priority = 0xff;
> >> qemu_irq_lower(ss->output);
> >> - ics_reject(xics->ics, old_xisr);
> >> + if (ss->xirr_owner) {
> >> + ics_reject(ss->xirr_owner, old_xisr);
> >> + ss->xirr_owner = NULL;
> >> + }
> >> }
> >> } else {
> >> if (!XISR(ss)) {
> >> @@ -271,7 +282,7 @@ void icp_set_mfrr(XICSState *xics, int server, uint8_t
> >> mfrr)
> >>
> >> ss->mfrr = mfrr;
> >> if (mfrr < CPPR(ss)) {
> >> - icp_check_ipi(xics, server);
> >> + icp_check_ipi(ss);
> >> }
> >> }
> >>
> >> @@ -282,6 +293,7 @@ uint32_t icp_accept(ICPState *ss)
> >> qemu_irq_lower(ss->output);
> >> ss->xirr = ss->pending_priority << 24;
> >> ss->pending_priority = 0xff;
> >> + ss->xirr_owner = NULL;
> >>
> >> trace_xics_icp_accept(xirr, ss->xirr);
> >>
> >> @@ -299,30 +311,40 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
> >> void icp_eoi(XICSState *xics, int server, uint32_t xirr)
> >> {
> >> ICPState *ss = xics->ss + server;
> >> + ICSState *ics;
> >> + uint32_t irq;
> >>
> >> /* Send EOI -> ICS */
> >> ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
> >> trace_xics_icp_eoi(server, xirr, ss->xirr);
> >> - ics_eoi(xics->ics, xirr & XISR_MASK);
> >> + irq = xirr & XISR_MASK;
> >> + QLIST_FOREACH(ics, &xics->ics, list) {
> >> + if (ics_valid_irq(ics, irq)) {
> >> + ics_eoi(ics, irq);
> >
> > You could slightly optimize this by adding a break; here, but again
> > that can be a tweak for another time.
>
> Sure.
>
> >
> >> + }
> >> + }
> >> if (!XISR(ss)) {
> >> icp_resend(xics, server);
> >> }
> >> }
> >>
> >> -static void icp_irq(XICSState *xics, int server, int nr, uint8_t priority)
> >> +static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
> >> {
> >> + XICSState *xics = ics->xics;
> >> ICPState *ss = xics->ss + server;
> >
> > It's kind of weird for an icp_() function to take an ics but not an
> > icp. It might make more sense for it to take both, even though
> > obviously it can be derived from just the ics. Again, won't hold up
> > the series for this.
> >
> >> trace_xics_icp_irq(server, nr, priority);
> >>
> >> if ((priority >= CPPR(ss))
> >> || (XISR(ss) && (ss->pending_priority <= priority))) {
> >> - ics_reject(xics->ics, nr);
> >> + ics_reject(ics, nr);
> >> } else {
> >> - if (XISR(ss)) {
> >> - ics_reject(xics->ics, XISR(ss));
> >> + if (XISR(ss) && ss->xirr_owner) {
> >> + ics_reject(ss->xirr_owner, XISR(ss));
> >> + ss->xirr_owner = NULL;
> >> }
> >> ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
> >> + ss->xirr_owner = ics;
> >> ss->pending_priority = priority;
> >> trace_xics_icp_raise(ss->xirr, ss->pending_priority);
> >> qemu_irq_raise(ss->output);
> >> @@ -378,12 +400,42 @@ static void icp_reset(DeviceState *dev)
> >> qemu_set_irq(icp->output, 0);
> >> }
> >>
> >> +static int icp_post_load(ICPState *ss, int version_id)
> >> +{
> >> + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >> + XICSState *xics = spapr->xics;
> >> + uint32_t irq, i;
> >> + static uint32_t server_no = 0;
> >
> > Eugh.. static locals are usually horrid.
> >
> >> + server_no++;
> >> + irq = XISR(ss);
> >> + if (!ss->cs || !irq)
> >> + goto icpend;
> >
> > That's an awful use of a goto - inverting the if sense is clearer and
> > no longer.
>
> Sure.
>
> >
> >> +
> >> + /* Restore the xirr_owner */
> >> + ss->xirr_owner = xics_find_source(xics, irq);
> >
> > Do we actually need this? Have you confirmed that replaying the
> > icp_resend()s won't already accomplish this?
>
> Yes, i have had migration hang on the target. Other thing its only
> affecting "kernel-irqchip=off"
Hrm.. It would be good to understand exactly what is going on here so
we can be confident we've solved it correctly.
> >> + icpend:
> >> + trace_xics_icp_post_load(server_no, ss->xirr,
> >> (uint64_t)ss->xirr_owner, ss->pending_priority);
> >> + if (xics->nr_servers != server_no)
> >> + return 0;
> >> +
> >> + /* All the ICPs are processed, not restore all the state */
> >> + for (i = 0; i < xics->nr_servers; i++) {
> >> + icp_resend(xics, i);
> >> + }
> >> + server_no = 0;
> >
> > Eugh. This is really ugly. It really looks like we need some
> > migration state on the overall xics, not just the component icps and
> > icss. Dealing with backwards compatibility will be hairy, but I
> > really hope we can do better than this hack.
>
> Earlier icp_resend() was called from ics_post_load() path, and
> icp_post_load gets called after ics_post_load, the xirr_owner is not
> restored during ics_post_load.
>
> If we can get icp_post_load called before ics_post_load, we would not
> need the above code. Or as you suggested somehere above this.
Hm, ok. I'm wondering if we can move most of the post_load logic to a
top-level xicsstate structure. I think the migration layer guarantees
that parent objects have their post_load called after all the
subordinate objects have their state restore.
Migration compat could be tricky. Since I don't think we need any
actual associated with the top-level object it should be possible in
theory, but I'm not sure if there's inbuilt support for an object with
no on-wire presence, but just a post_load handler.
>
> >
> >> + return 0;
> >> +}
> >> +
> >> static void icp_class_init(ObjectClass *klass, void *data)
> >> {
> >> DeviceClass *dc = DEVICE_CLASS(klass);
> >> + ICPStateClass *icpc = ICP_CLASS(klass);
> >>
> >> dc->reset = icp_reset;
> >> dc->vmsd = &vmstate_icp_server;
> >> + icpc->post_load = icp_post_load;
> >> }
> >>
> >> static const TypeInfo icp_info = {
> >> @@ -405,8 +457,7 @@ static void resend_msi(ICSState *ics, int srcno)
> >> if (irq->status & XICS_STATUS_REJECTED) {
> >> irq->status &= ~XICS_STATUS_REJECTED;
> >> if (irq->priority != 0xff) {
> >> - icp_irq(ics->xics, irq->server, srcno + ics->offset,
> >> - irq->priority);
> >> + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
> >> }
> >> }
> >> }
> >> @@ -419,7 +470,7 @@ static void resend_lsi(ICSState *ics, int srcno)
> >> && (irq->status & XICS_STATUS_ASSERTED)
> >> && !(irq->status & XICS_STATUS_SENT)) {
> >> irq->status |= XICS_STATUS_SENT;
> >> - icp_irq(ics->xics, irq->server, srcno + ics->offset,
> >> irq->priority);
> >> + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
> >> }
> >> }
> >>
> >> @@ -434,7 +485,7 @@ static void set_irq_msi(ICSState *ics, int srcno, int
> >> val)
> >> irq->status |= XICS_STATUS_MASKED_PENDING;
> >> trace_xics_masked_pending();
> >> } else {
> >> - icp_irq(ics->xics, irq->server, srcno + ics->offset,
> >> irq->priority);
> >> + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
> >> }
> >> }
> >> }
> >> @@ -473,7 +524,7 @@ static void write_xive_msi(ICSState *ics, int srcno)
> >> }
> >>
> >> irq->status &= ~XICS_STATUS_MASKED_PENDING;
> >> - icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
> >> + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
> >> }
> >>
> >> static void write_xive_lsi(ICSState *ics, int srcno)
> >> @@ -505,8 +556,11 @@ static void ics_reject(ICSState *ics, int nr)
> >> ICSIRQState *irq = ics->irqs + nr - ics->offset;
> >>
> >> trace_xics_ics_reject(nr, nr - ics->offset);
> >> - irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for
> >> LSI */
> >> - irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI
> >> */
> >> + if (irq->flags & XICS_FLAGS_IRQ_MSI) {
> >> + irq->status |= XICS_STATUS_REJECTED;
> >> + } else {
> >> + irq->status &= ~XICS_STATUS_SENT;
> >> + }
> >
> > This change appears unrelated to the rest. If not that needs an
> > explanation.
>
> I was seeing inconsistent status because of this in the trace logs, like
> for LSI status as 0x7, i.e. XICS_STATUS_ASSERTED, XICS_STATUS_SENT and
> XICS_STATUS_REJECTED all set, which did not make sense. So the REJECTED
> would have been set in earlier interrupt cycle, and then asserted and
> sent in this current one.
Ah, ok. Can you split this out into a separate patch then, with an
explanation that it's to remove confusing debug output?
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature