[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH v3 1/4] ppc/xics: Make the ICSState a list
From: |
Nikunj A Dadhania |
Subject: |
Re: [Qemu-ppc] [PATCH v3 1/4] ppc/xics: Make the ICSState a list |
Date: |
Fri, 08 Jul 2016 10:20:32 +0530 |
User-agent: |
Notmuch/0.21 (https://notmuchmail.org) Emacs/25.0.94.1 (x86_64-redhat-linux-gnu) |
David Gibson <address@hidden> writes:
> [ Unknown signature status ]
> On Thu, Jul 07, 2016 at 11:24:15PM +0530, Nikunj A Dadhania wrote:
>> From: Benjamin Herrenschmidt <address@hidden>
>>
>> Instead of an array of fixed sized blocks, use a list, as we will need
>> to have sources with variable number of interrupts. SPAPR only uses
>> a single entry. Native will create more. If performance becomes an
>> issue we can add some hashed lookup but for now this will do fine.
>>
>> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
>> [ move the initialization of list to xics_common_initfn,
>> restore xirr_owner after migration and move restoring to
>> icp_post_load]
>> Signed-off-by: Nikunj A Dadhania <address@hidden>
>> ---
>> hw/intc/trace-events | 5 +-
>> hw/intc/xics.c | 134
>> ++++++++++++++++++++++++++++++++------------------
>> hw/intc/xics_kvm.c | 27 +++++++---
>> hw/intc/xics_spapr.c | 88 +++++++++++++++++++++------------
>> hw/ppc/spapr_events.c | 2 +-
>> hw/ppc/spapr_pci.c | 5 +-
>> hw/ppc/spapr_vio.c | 2 +-
>> include/hw/ppc/xics.h | 13 ++---
>> 8 files changed, 175 insertions(+), 101 deletions(-)
>>
>> diff --git a/hw/intc/trace-events b/hw/intc/trace-events
>> index 376dd18..41ced33 100644
>> --- a/hw/intc/trace-events
>> +++ b/hw/intc/trace-events
>> @@ -56,10 +56,11 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno
>> %d [irq %#x]"
>> xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority)
>> "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
>> xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
>> xics_ics_eoi(int nr) "ics_eoi: irq %#x"
>> -xics_alloc(int src, int irq) "source#%d, irq %d"
>> -xics_alloc_block(int src, int first, int num, bool lsi, int align)
>> "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d"
>> +xics_alloc(int irq) "irq %d"
>> +xics_alloc_block(int first, int num, bool lsi, int align) "first irq %d, %d
>> irqs, lsi=%d, alignnum %d"
>> xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs"
>> xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free"
>> +xics_icp_post_load(uint32_t server_no, uint32_t xirr, uint64_t addr,
>> uint8_t pend) "server_no %d, xirr %#x, xirr_owner 0x%" PRIx64 ", pending %d"
>>
>> # hw/intc/s390_flic_kvm.c
>> flic_create_device(int err) "flic: create device failed %d"
>> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
>> index cd48f42..0af05c9 100644
>> --- a/hw/intc/xics.c
>> +++ b/hw/intc/xics.c
>> @@ -32,6 +32,7 @@
>> #include "hw/hw.h"
>> #include "trace.h"
>> #include "qemu/timer.h"
>> +#include "hw/ppc/spapr.h"
>> #include "hw/ppc/xics.h"
>> #include "qemu/error-report.h"
>> #include "qapi/visitor.h"
>> @@ -96,13 +97,16 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
>> static void xics_common_reset(DeviceState *d)
>> {
>> XICSState *xics = XICS_COMMON(d);
>> + ICSState *ics;
>> int i;
>>
>> for (i = 0; i < xics->nr_servers; i++) {
>> device_reset(DEVICE(&xics->ss[i]));
>> }
>>
>> - device_reset(DEVICE(xics->ics));
>> + QLIST_FOREACH(ics, &xics->ics, list) {
>> + device_reset(DEVICE(ics));
>> + }
>> }
>>
>> static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name,
>> @@ -134,7 +138,6 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor
>> *v, const char *name,
>> }
>>
>> assert(info->set_nr_irqs);
>> - assert(xics->ics);
>> info->set_nr_irqs(xics, value, errp);
>> }
>>
>> @@ -174,6 +177,9 @@ static void xics_prop_set_nr_servers(Object *obj,
>> Visitor *v,
>>
>> static void xics_common_initfn(Object *obj)
>> {
>> + XICSState *xics = XICS_COMMON(obj);
>> +
>> + QLIST_INIT(&xics->ics);
>> object_property_add(obj, "nr_irqs", "int",
>> xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
>> NULL, NULL, NULL);
>> @@ -212,33 +218,35 @@ static void ics_reject(ICSState *ics, int nr);
>> static void ics_resend(ICSState *ics);
>> static void ics_eoi(ICSState *ics, int nr);
>>
>> -static void icp_check_ipi(XICSState *xics, int server)
>> +static void icp_check_ipi(ICPState *ss)
>> {
>> - ICPState *ss = xics->ss + server;
>> -
>> if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
>> return;
>> }
>>
>> - trace_xics_icp_check_ipi(server, ss->mfrr);
>> + trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
>
> The dt_id might be more useful here, given the changes that are going
> on in that area. But it's not worth holding up this series just for
> that.
Yes, I am seeing those changes, will adjust accordingly if that goes
first.
>
>>
>> - if (XISR(ss)) {
>> - ics_reject(xics->ics, XISR(ss));
>> + if (XISR(ss) && ss->xirr_owner) {
>> + ics_reject(ss->xirr_owner, XISR(ss));
>> }
>>
>> ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
>> ss->pending_priority = ss->mfrr;
>> + ss->xirr_owner = NULL;
>> qemu_irq_raise(ss->output);
>> }
>>
>> static void icp_resend(XICSState *xics, int server)
>> {
>> ICPState *ss = xics->ss + server;
>> + ICSState *ics;
>
> Possibly this should take icp instead of xics for consistency. Or
> else be renamed xics_resend(). But that can be a cleanup for another
> time.
>
>> if (ss->mfrr < CPPR(ss)) {
>> - icp_check_ipi(xics, server);
>> + icp_check_ipi(ss);
>> + }
>> + QLIST_FOREACH(ics, &xics->ics, list) {
>> + ics_resend(ics);
>> }
>> - ics_resend(xics->ics);
>> }
>>
>> void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
>> @@ -256,7 +264,10 @@ void icp_set_cppr(XICSState *xics, int server, uint8_t
>> cppr)
>> ss->xirr &= ~XISR_MASK; /* Clear XISR */
>> ss->pending_priority = 0xff;
>> qemu_irq_lower(ss->output);
>> - ics_reject(xics->ics, old_xisr);
>> + if (ss->xirr_owner) {
>> + ics_reject(ss->xirr_owner, old_xisr);
>> + ss->xirr_owner = NULL;
>> + }
>> }
>> } else {
>> if (!XISR(ss)) {
>> @@ -271,7 +282,7 @@ void icp_set_mfrr(XICSState *xics, int server, uint8_t
>> mfrr)
>>
>> ss->mfrr = mfrr;
>> if (mfrr < CPPR(ss)) {
>> - icp_check_ipi(xics, server);
>> + icp_check_ipi(ss);
>> }
>> }
>>
>> @@ -282,6 +293,7 @@ uint32_t icp_accept(ICPState *ss)
>> qemu_irq_lower(ss->output);
>> ss->xirr = ss->pending_priority << 24;
>> ss->pending_priority = 0xff;
>> + ss->xirr_owner = NULL;
>>
>> trace_xics_icp_accept(xirr, ss->xirr);
>>
>> @@ -299,30 +311,40 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
>> void icp_eoi(XICSState *xics, int server, uint32_t xirr)
>> {
>> ICPState *ss = xics->ss + server;
>> + ICSState *ics;
>> + uint32_t irq;
>>
>> /* Send EOI -> ICS */
>> ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
>> trace_xics_icp_eoi(server, xirr, ss->xirr);
>> - ics_eoi(xics->ics, xirr & XISR_MASK);
>> + irq = xirr & XISR_MASK;
>> + QLIST_FOREACH(ics, &xics->ics, list) {
>> + if (ics_valid_irq(ics, irq)) {
>> + ics_eoi(ics, irq);
>
> You could slightly optimize this by adding a break; here, but again
> that can be a tweak for another time.
Sure.
>
>> + }
>> + }
>> if (!XISR(ss)) {
>> icp_resend(xics, server);
>> }
>> }
>>
>> -static void icp_irq(XICSState *xics, int server, int nr, uint8_t priority)
>> +static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
>> {
>> + XICSState *xics = ics->xics;
>> ICPState *ss = xics->ss + server;
>
> It's kind of weird for an icp_() function to take an ics but not an
> icp. It might make more sense for it to take both, even though
> obviously it can be derived from just the ics. Again, won't hold up
> the series for this.
>
>> trace_xics_icp_irq(server, nr, priority);
>>
>> if ((priority >= CPPR(ss))
>> || (XISR(ss) && (ss->pending_priority <= priority))) {
>> - ics_reject(xics->ics, nr);
>> + ics_reject(ics, nr);
>> } else {
>> - if (XISR(ss)) {
>> - ics_reject(xics->ics, XISR(ss));
>> + if (XISR(ss) && ss->xirr_owner) {
>> + ics_reject(ss->xirr_owner, XISR(ss));
>> + ss->xirr_owner = NULL;
>> }
>> ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
>> + ss->xirr_owner = ics;
>> ss->pending_priority = priority;
>> trace_xics_icp_raise(ss->xirr, ss->pending_priority);
>> qemu_irq_raise(ss->output);
>> @@ -378,12 +400,42 @@ static void icp_reset(DeviceState *dev)
>> qemu_set_irq(icp->output, 0);
>> }
>>
>> +static int icp_post_load(ICPState *ss, int version_id)
>> +{
>> + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> + XICSState *xics = spapr->xics;
>> + uint32_t irq, i;
>> + static uint32_t server_no = 0;
>
> Eugh.. static locals are usually horrid.
>
>> + server_no++;
>> + irq = XISR(ss);
>> + if (!ss->cs || !irq)
>> + goto icpend;
>
> That's an awful use of a goto - inverting the if sense is clearer and
> no longer.
Sure.
>
>> +
>> + /* Restore the xirr_owner */
>> + ss->xirr_owner = xics_find_source(xics, irq);
>
> Do we actually need this? Have you confirmed that replaying the
> icp_resend()s won't already accomplish this?
Yes, i have had migration hang on the target. Other thing its only
affecting "kernel-irqchip=off"
>> + icpend:
>> + trace_xics_icp_post_load(server_no, ss->xirr, (uint64_t)ss->xirr_owner,
>> ss->pending_priority);
>> + if (xics->nr_servers != server_no)
>> + return 0;
>> +
>> + /* All the ICPs are processed, not restore all the state */
>> + for (i = 0; i < xics->nr_servers; i++) {
>> + icp_resend(xics, i);
>> + }
>> + server_no = 0;
>
> Eugh. This is really ugly. It really looks like we need some
> migration state on the overall xics, not just the component icps and
> icss. Dealing with backwards compatibility will be hairy, but I
> really hope we can do better than this hack.
Earlier icp_resend() was called from ics_post_load() path, and
icp_post_load gets called after ics_post_load, the xirr_owner is not
restored during ics_post_load.
If we can get icp_post_load called before ics_post_load, we would not
need the above code. Or as you suggested somehere above this.
>
>> + return 0;
>> +}
>> +
>> static void icp_class_init(ObjectClass *klass, void *data)
>> {
>> DeviceClass *dc = DEVICE_CLASS(klass);
>> + ICPStateClass *icpc = ICP_CLASS(klass);
>>
>> dc->reset = icp_reset;
>> dc->vmsd = &vmstate_icp_server;
>> + icpc->post_load = icp_post_load;
>> }
>>
>> static const TypeInfo icp_info = {
>> @@ -405,8 +457,7 @@ static void resend_msi(ICSState *ics, int srcno)
>> if (irq->status & XICS_STATUS_REJECTED) {
>> irq->status &= ~XICS_STATUS_REJECTED;
>> if (irq->priority != 0xff) {
>> - icp_irq(ics->xics, irq->server, srcno + ics->offset,
>> - irq->priority);
>> + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
>> }
>> }
>> }
>> @@ -419,7 +470,7 @@ static void resend_lsi(ICSState *ics, int srcno)
>> && (irq->status & XICS_STATUS_ASSERTED)
>> && !(irq->status & XICS_STATUS_SENT)) {
>> irq->status |= XICS_STATUS_SENT;
>> - icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
>> + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
>> }
>> }
>>
>> @@ -434,7 +485,7 @@ static void set_irq_msi(ICSState *ics, int srcno, int
>> val)
>> irq->status |= XICS_STATUS_MASKED_PENDING;
>> trace_xics_masked_pending();
>> } else {
>> - icp_irq(ics->xics, irq->server, srcno + ics->offset,
>> irq->priority);
>> + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
>> }
>> }
>> }
>> @@ -473,7 +524,7 @@ static void write_xive_msi(ICSState *ics, int srcno)
>> }
>>
>> irq->status &= ~XICS_STATUS_MASKED_PENDING;
>> - icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
>> + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
>> }
>>
>> static void write_xive_lsi(ICSState *ics, int srcno)
>> @@ -505,8 +556,11 @@ static void ics_reject(ICSState *ics, int nr)
>> ICSIRQState *irq = ics->irqs + nr - ics->offset;
>>
>> trace_xics_ics_reject(nr, nr - ics->offset);
>> - irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for LSI
>> */
>> - irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */
>> + if (irq->flags & XICS_FLAGS_IRQ_MSI) {
>> + irq->status |= XICS_STATUS_REJECTED;
>> + } else {
>> + irq->status &= ~XICS_STATUS_SENT;
>> + }
>
> This change appears unrelated to the rest. If not that needs an explanation.
I was seeing inconsistent status because of this in the trace logs, like
for LSI status as 0x7, i.e. XICS_STATUS_ASSERTED, XICS_STATUS_SENT and
XICS_STATUS_REJECTED all set, which did not make sense. So the REJECTED
would have been set in earlier interrupt cycle, and then asserted and
sent in this current one.
Regards
Nikunj