qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] pseries: Support for in-kernel XICS interrupt c


From: Alexander Graf
Subject: Re: [Qemu-devel] [PATCH] pseries: Support for in-kernel XICS interrupt controller
Date: Fri, 21 Jun 2013 01:10:36 +0200

On 05.06.2013, at 09:39, Alexey Kardashevskiy wrote:

> From: David Gibson <address@hidden>
> 
> Recent (host) kernels support emulating the PAPR defined "XICS" interrupt
> controller system within KVM.  This patch allows qemu to initialize and
> configure the in-kernel XICS, and keep its state in sync with qemu's XICS
> state as necessary.
> 
> This should give considerable performance improvements.  e.g. on a simple
> IPI ping-pong test between hardware threads, using qemu XICS gives us
> around 5,000 irqs/second, whereas the in-kernel XICS gives us around
> 70,000 irqs/s on the same hardware configuration.
> 
> [Mike Qiu <address@hidden>: fixed mistype which caused ics_set_kvm_state() to 
> fail]
> Signed-off-by: David Gibson <address@hidden>
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> 
> This depends on the "pseries: savevm support for XICS interrupt controller"
> patch posted earlier.
> 
> ---
> hw/ppc/spapr.c        |    4 +-
> hw/ppc/xics.c         |  333 ++++++++++++++++++++++++++++++++++++++++++++++++-
> include/hw/ppc/xics.h |    8 +-
> 3 files changed, 336 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 71da11b..04e0eae 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1136,8 +1136,6 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>         }
>         env = &cpu->env;
> 
> -        xics_cpu_setup(spapr->icp, cpu);
> -
>         /* Set time-base frequency to 512 MHz */
>         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> 
> @@ -1151,6 +1149,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>             kvmppc_set_papr(cpu);
>         }
> 
> +        xics_cpu_setup(spapr->icp, cpu);
> +
>         qemu_register_reset(spapr_cpu_reset, cpu);
>     }
> 
> diff --git a/hw/ppc/xics.c b/hw/ppc/xics.c
> index 02e44a0..b83f19f 100644
> --- a/hw/ppc/xics.c
> +++ b/hw/ppc/xics.c
> @@ -29,12 +29,19 @@
> #include "trace.h"
> #include "hw/ppc/spapr.h"
> #include "hw/ppc/xics.h"
> +#include "kvm_ppc.h"
> +#include "sysemu/kvm.h"
> +#include "config.h"
> +#include "qemu/config-file.h"
> +
> +#include <sys/ioctl.h>

Huh? This breaks compilation on non-Linux.

> 
> /*
>  * ICP: Presentation layer
>  */
> 
> struct icp_server_state {
> +    CPUState *cs;

Why did you get around this earlier without the CPUState pointer?

>     uint32_t xirr;
>     uint8_t pending_priority;
>     uint8_t mfrr;
> @@ -53,6 +60,9 @@ struct icp_state {
>     uint32_t nr_servers;
>     struct icp_server_state *ss;
>     struct ics_state *ics;
> +    uint32_t set_xive_token, get_xive_token,
> +        int_off_token, int_on_token;

Separate declaration lines please.

> +    int kernel_xics_fd;
> };
> 
> static void ics_reject(struct ics_state *ics, int nr);
> @@ -168,6 +178,66 @@ static void icp_irq(struct icp_state *icp, int server, 
> int nr, uint8_t priority)
>     }
> }
> 
> +static void icp_get_kvm_state(struct icp_server_state *ss)
> +{
> +#ifdef CONFIG_KVM
> +    uint64_t state;
> +    struct kvm_one_reg reg = {
> +        .id = KVM_REG_PPC_ICP_STATE,
> +        .addr = (uintptr_t)&state,
> +    };
> +    int ret;
> +
> +    if (!ss->cs) {
> +        return; /* kernel irqchip not in use */
> +    }
> +
> +    ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, &reg);
> +    if (ret != 0) {
> +        fprintf(stderr, "Unable to retrieve KVM interrupt controller state"
> +                " for CPU %d: %s\n", ss->cs->cpu_index, strerror(errno));
> +        exit(1);
> +    }
> +
> +    ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
> +    ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
> +        & KVM_REG_PPC_ICP_MFRR_MASK;
> +    ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
> +        & KVM_REG_PPC_ICP_PPRI_MASK;
> +#endif /* CONFIG_KVM */

This needs to get encapsulated into a kvm helper function that gets a dummy 
definition for non-KVM. We've been through this multiple times now in other 
areas.

> +}
> +
> +static int icp_set_kvm_state(struct icp_server_state *ss)
> +{
> +#ifdef CONFIG_KVM
> +    uint64_t state;
> +    struct kvm_one_reg reg = {
> +        .id = KVM_REG_PPC_ICP_STATE,
> +        .addr = (uintptr_t)&state,
> +    };
> +    int ret;
> +
> +    if (!ss->cs) {
> +        return 0; /* kernel irqchip not in use */
> +    }
> +
> +    state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
> +        | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
> +        | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
> +
> +    ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, &reg);
> +    if (ret != 0) {
> +        fprintf(stderr, "Unable to restore KVM interrupt controller state 
> (0x%"
> +                PRIx64 ") for CPU %d: %s\n", state, ss->cs->cpu_index,
> +                strerror(errno));
> +        exit(1);
> +        return ret;
> +    }
> +#endif /* CONFIG_KVM */
> +
> +    return 0;
> +}
> +
> /*
>  * ICS: Source layer
>  */
> @@ -336,6 +406,107 @@ static void ics_eoi(struct ics_state *ics, int nr)
>     }
> }
> 
> +static void ics_get_kvm_state(struct ics_state *ics)
> +{
> +#ifdef CONFIG_KVM
> +    uint64_t state;
> +    struct kvm_device_attr attr = {
> +        .flags = 0,
> +        .group = KVM_DEV_XICS_GRP_SOURCES,
> +        .addr = (uint64_t)(uintptr_t)&state,
> +    };
> +    int i;
> +
> +    if (ics->icp->kernel_xics_fd == -1) {
> +        return; /* kernel irqchip not in use */
> +    }
> +
> +    for (i = 0; i < ics->nr_irqs; i++) {
> +        struct ics_irq_state *irq = &ics->irqs[i];
> +        int ret;
> +
> +        attr.attr = i + ics->offset;
> +
> +        ret = ioctl(ics->icp->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
> +        if (ret != 0) {
> +            fprintf(stderr, "Unable to retrieve KVM interrupt controller 
> state"
> +                    " for IRQ %d: %s\n", i + ics->offset, strerror(errno));
> +            exit(1);
> +        }
> +
> +        irq->server = state & KVM_XICS_DESTINATION_MASK;
> +        irq->saved_priority = (state >> KVM_XICS_PRIORITY_SHIFT)
> +            & KVM_XICS_PRIORITY_MASK;
> +        if (state & KVM_XICS_MASKED) {

This needs explanation in a comment

> +            irq->priority = 0xff;
> +        } else {
> +            irq->priority = irq->saved_priority;
> +        }
> +
> +        if (state & KVM_XICS_PENDING) {
> +            if (state & KVM_XICS_LEVEL_SENSITIVE) {
> +                irq->status |= XICS_STATUS_ASSERTED;
> +            } else {

This needs explanation in a comment. Why is an EDGE pending interrupt rejected?

> +                irq->status |= XICS_STATUS_MASKED_PENDING
> +                    | XICS_STATUS_REJECTED;
> +            }
> +        }
> +    }
> +#endif /* CONFIG_KVM */
> +}
> +
> +static int ics_set_kvm_state(struct ics_state *ics)
> +{
> +#ifdef CONFIG_KVM
> +    uint64_t state;
> +    struct kvm_device_attr attr = {
> +        .flags = 0,
> +        .group = KVM_DEV_XICS_GRP_SOURCES,
> +        .addr = (uint64_t)(uintptr_t)&state,
> +    };
> +    int i;
> +
> +    if (ics->icp->kernel_xics_fd == -1) {
> +        return 0; /* kernel irqchip not in use */
> +    }
> +
> +    for (i = 0; i < ics->nr_irqs; i++) {
> +        struct ics_irq_state *irq = &ics->irqs[i];
> +        int ret;
> +
> +        attr.attr = i + ics->offset;
> +
> +        state = irq->server;
> +        state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK)
> +            << KVM_XICS_PRIORITY_SHIFT;
> +        if (irq->priority != irq->saved_priority) {
> +            assert(irq->priority == 0xff);
> +            state |= KVM_XICS_MASKED;
> +        }
> +
> +        if (ics->islsi[i]) {
> +            state |= KVM_XICS_LEVEL_SENSITIVE;
> +            if (irq->status & XICS_STATUS_ASSERTED) {
> +                state |= KVM_XICS_PENDING;
> +            }
> +        } else {
> +            if (irq->status & XICS_STATUS_MASKED_PENDING) {
> +                state |= KVM_XICS_PENDING;
> +            }
> +        }
> +
> +        ret = ioctl(ics->icp->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
> +        if (ret != 0) {
> +            fprintf(stderr, "Unable to restore KVM interrupt controller 
> state"
> +                    " for IRQs %d: %s\n", i + ics->offset, strerror(errno));
> +            return ret;
> +        }
> +    }
> +#endif /* CONFIG_KVM */
> +
> +    return 0;
> +}
> +
> /*
>  * Exported functions
>  */
> @@ -514,6 +685,10 @@ static void xics_reset(void *opaque)
>         icp->ss[i].mfrr = 0xff;
>         /* Make all outputs are deasserted */
>         qemu_set_irq(icp->ss[i].output, 0);
> +
> +        if (icp->ss[i].cs) {
> +            icp_set_kvm_state(&icp->ss[i]);
> +        }
>     }
> 
>     memset(ics->irqs, 0, sizeof(struct ics_irq_state) * ics->nr_irqs);
> @@ -521,6 +696,26 @@ static void xics_reset(void *opaque)
>         ics->irqs[i].priority = 0xff;
>         ics->irqs[i].saved_priority = 0xff;
>     }
> +    ics_set_kvm_state(ics);
> +}
> +
> +static void icp_pre_save(void *opaque)
> +{
> +    struct icp_server_state *ss = opaque;
> +    icp_get_kvm_state(ss);
> +}
> +
> +static int icp_post_load(void *opaque, int version_id)
> +{
> +    struct icp_server_state *ss = opaque;
> +
> +    return icp_set_kvm_state(ss);
> +}
> +
> +static void ics_pre_save(void *opaque)
> +{
> +    struct ics_state *ics = opaque;
> +    ics_get_kvm_state(ics);
> }
> 
> static int ics_post_load(void *opaque, int version_id)
> @@ -528,6 +723,10 @@ static int ics_post_load(void *opaque, int version_id)
>     int i;
>     struct ics_state *ics = opaque;
> 
> +    if (ics->icp->kernel_xics_fd != -1) {
> +        return ics_set_kvm_state(ics);
> +    }
> +
>     for (i = 0; i < ics->icp->nr_servers; i++) {
>         icp_resend(ics->icp, i);
>     }
> @@ -535,11 +734,14 @@ static int ics_post_load(void *opaque, int version_id)
>     return 0;
> }
> 
> +
> static const VMStateDescription vmstate_icp_server = {
>     .name = "icp/server",
>     .version_id = 1,
>     .minimum_version_id = 1,
>     .minimum_version_id_old = 1,
> +    .pre_save = icp_pre_save,
> +    .post_load = icp_post_load,
>     .fields      = (VMStateField []) {
>         /* Sanity check */
>         VMSTATE_UINT32(xirr, struct icp_server_state),
> @@ -568,6 +770,7 @@ static const VMStateDescription vmstate_ics = {
>     .version_id = 1,
>     .minimum_version_id = 1,
>     .minimum_version_id_old = 1,
> +    .pre_save = ics_pre_save,
>     .post_load = ics_post_load,
>     .fields      = (VMStateField []) {
>         /* Sanity check */
> @@ -586,6 +789,28 @@ void xics_cpu_setup(struct icp_state *icp, PowerPCCPU 
> *cpu)
> 
>     assert(cs->cpu_index < icp->nr_servers);
> 
> +    if (icp->kernel_xics_fd != -1) {
> +#ifdef CONFIG_KVM
> +        int ret;
> +        struct kvm_enable_cap xics_enable_cap = {
> +            .cap = KVM_CAP_IRQ_XICS,
> +            .flags = 0,
> +            .args = {icp->kernel_xics_fd, cs->cpu_index, 0, 0},
> +        };
> +
> +        ss->cs = cs;
> +
> +        ret = kvm_vcpu_ioctl(ss->cs, KVM_ENABLE_CAP, &xics_enable_cap);
> +        if (ret < 0) {
> +            fprintf(stderr, "Unable to connect CPU%d to kernel XICS: %s\n",
> +                    cs->cpu_index, strerror(errno));
> +            exit(1);
> +        }
> +#else
> +        abort();
> +#endif
> +    }
> +
>     switch (PPC_INPUT(env)) {
>     case PPC_FLAGS_INPUT_POWER7:
>         ss->output = env->irq_inputs[POWER7_INPUT_INT];
> @@ -604,6 +829,97 @@ void xics_cpu_setup(struct icp_state *icp, PowerPCCPU 
> *cpu)
>     vmstate_register(NULL, cs->cpu_index, &vmstate_icp_server, ss);
> }
> 
> +#ifdef CONFIG_KVM
> +static void ics_set_irq_kvm(void *opaque, int srcno, int val)
> +{
> +    struct ics_state *ics = opaque;
> +    struct kvm_irq_level args;
> +    int rc;
> +
> +    args.irq = srcno + ics->offset;
> +    if (!ics->islsi[srcno]) {
> +        if (!val) {
> +            return;
> +        }
> +        args.level = KVM_INTERRUPT_SET;
> +    } else {
> +        args.level = val ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
> +    }
> +    rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
> +    if (rc < 0) {
> +        perror("kvm_irq_line");
> +    }
> +}
> +
> +static int xics_kernel_init(struct icp_state *icp)
> +{
> +    QemuOptsList *list = qemu_find_opts("machine");
> +    struct ics_state *ics = icp->ics;
> +    int rc;
> +    struct kvm_create_device xics_create_device = {
> +        .type = KVM_DEV_TYPE_XICS,
> +        .flags = 0,
> +    };
> +
> +    if (!kvm_enabled()) {
> +        return -ENODEV;
> +    }
> +
> +    if (QTAILQ_EMPTY(&list->head) ||
> +        !qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
> +                           "kernel_irqchip", true) ||
> +        !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
> +        return -ENODEV;
> +    }
> +
> +    rc = kvmppc_define_rtas_token(icp->set_xive_token, "ibm,set-xive");
> +    if (rc < 0) {
> +        perror("kvmppc_define_rtas_token: ibm,set-xive");
> +        goto fail;
> +    }
> +
> +    rc = kvmppc_define_rtas_token(icp->get_xive_token, "ibm,get-xive");
> +    if (rc < 0) {
> +        perror("kvmppc_define_rtas_token: ibm,get-xive");
> +        goto fail;
> +    }
> +
> +    rc = kvmppc_define_rtas_token(icp->int_on_token, "ibm,int-on");
> +    if (rc < 0) {
> +        perror("kvmppc_define_rtas_token: ibm,int-on");
> +        goto fail;
> +    }
> +
> +    rc = kvmppc_define_rtas_token(icp->int_off_token, "ibm,int-off");
> +    if (rc < 0) {
> +        perror("kvmppc_define_rtas_token: ibm,int-off");
> +        goto fail;
> +    }
> +
> +    /* Create the kernel ICP */
> +    rc = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &xics_create_device);
> +    if (rc < 0) {
> +        fprintf(stderr, "Error on KVM_CREATE_DEVICE for XICS: %s",
> +                strerror(errno));
> +        goto fail;
> +    }
> +
> +    icp->kernel_xics_fd = xics_create_device.fd;
> +    ics->qirqs = qemu_allocate_irqs(ics_set_irq_kvm, ics, ics->nr_irqs);
> +
> +    return 0;
> +
> +fail:
> +    kvmppc_define_rtas_token(0, "ibm,set-xive");
> +    kvmppc_define_rtas_token(0, "ibm,get-xive");
> +    kvmppc_define_rtas_token(0, "ibm,int-on");
> +    kvmppc_define_rtas_token(0, "ibm,int-off");
> +    return rc;
> +}
> +#else
> +static int xics_kernel_init(struct icp_state *icp) { return -1; }
> +#endif
> +
> struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
> {
>     struct icp_state *icp;
> @@ -611,6 +927,7 @@ struct icp_state *xics_system_init(int nr_servers, int 
> nr_irqs)
> 
>     icp = g_malloc0(sizeof(*icp));
>     icp->nr_servers = nr_servers;
> +    icp->kernel_xics_fd = -1;
>     icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
> 
>     ics = g_malloc0(sizeof(*ics));
> @@ -622,17 +939,21 @@ struct icp_state *xics_system_init(int nr_servers, int 
> nr_irqs)
>     icp->ics = ics;
>     ics->icp = icp;
> 
> -    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
> -
> +    /* We don't actually use these functions with in-kernel XICS, but
> +     * registering them always doesn't hurt */
>     spapr_register_hypercall(H_CPPR, h_cppr);
>     spapr_register_hypercall(H_IPI, h_ipi);
>     spapr_register_hypercall(H_XIRR, h_xirr);
>     spapr_register_hypercall(H_EOI, h_eoi);
> 
> -    spapr_rtas_register("ibm,set-xive", rtas_set_xive);
> -    spapr_rtas_register("ibm,get-xive", rtas_get_xive);
> -    spapr_rtas_register("ibm,int-off", rtas_int_off);
> -    spapr_rtas_register("ibm,int-on", rtas_int_on);
> +    icp->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_set_xive);
> +    icp->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_get_xive);
> +    icp->int_off_token = spapr_rtas_register("ibm,int-off", rtas_int_off);
> +    icp->int_on_token = spapr_rtas_register("ibm,int-on", rtas_int_on);
> +
> +    if (xics_kernel_init(icp) != 0) {
> +        ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
> +    }

Looking at all this, it seems as if logic similar to hw/intc/openpic_kvm.c 
makes more sense. Just spawn as separate device for an in-kernel XICS 
controller and share the few definitions you can share in a common header file.


Alex

> 
>     qemu_register_reset(xics_reset, icp);
> 
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 6bce042..d52215d 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -28,12 +28,18 @@
> #define __XICS_H__
> 
> #define XICS_IPI        0x2
> -#define XICS_IRQ_BASE   0x10
> +#define XICS_BUID       0x1
> +#define XICS_IRQ_BASE   (XICS_BUID << 12)
> 
> struct icp_state;
> 
> qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
> void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
> +/*
> + * We currently only support one BUID which is our interrupt base
> + * (the kernel implementation supports more but we don't exploit
> + *  that yet)
> + */
> 
> struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
> void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
> -- 
> 1.7.10.4
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]