qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] Re: [PATCH v7 2/6] pcie/aer: helper functions for pcie aer


From: Michael S. Tsirkin
Subject: [Qemu-devel] Re: [PATCH v7 2/6] pcie/aer: helper functions for pcie aer capability
Date: Tue, 2 Nov 2010 14:57:12 +0200
User-agent: Mutt/1.5.21 (2010-09-15)

On Tue, Nov 02, 2010 at 06:32:48PM +0900, Isaku Yamahata wrote:
> This patch implements helper functions for pcie aer capability
> which will be used later.
> 
> Signed-off-by: Isaku Yamahata <address@hidden>
> ---
> Changes v6 -> v7:
> - make error log buffer non-ringed buffer
> - refactor pcie_aer_error_inject()
> - clean up of pcie_aer_write_config()
> - make pcie_aer_inject_error() return error
> - remove AERMsgResult
> - remove PCIEAERSeverity
> - reduce forward declarations
> - style clean ups
> - remove paren for sizeof.
> 
> Chnages v5 -> v6:
> - cleaned up pcie_aer_write_config().
> - enum definition.
> 
> Changes v4 -> v5:
> - use pci_xxx_test_and_xxx_mask()
> - rewrote PCIDevice::written bits.
> - eliminated pcie_aer_notify()
> - introduced PCIExpressDevice::aer_intx
> 
> Changes v3 -> v4:
> - various naming fixes.
> - use pci bit operation helper function
> - eliminate errmsg function pointer
> - replace pci_shift_xxx() with PCIDevice::written
> - uncorrect error status register.
> - dropped pcie_aer_cap()
> 
> Changes v2 -> v3:
> - split out from pcie.[ch] to pcie_aer.[ch] to make the files sorter.
> - embeded PCIExpressDevice into PCIDevice.
> - CodingStyle fix
> ---
>  Makefile.objs |    2 +-
>  hw/pcie.h     |   14 +
>  hw/pcie_aer.c |  825 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/pcie_aer.h |   94 +++++++
>  qemu-common.h |    3 +
>  5 files changed, 937 insertions(+), 1 deletions(-)
>  create mode 100644 hw/pcie_aer.c
>  create mode 100644 hw/pcie_aer.h
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index faf485e..c5919af 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -207,7 +207,7 @@ hw-obj-$(CONFIG_PIIX4) += piix4.o
>  # PCI watchdog devices
>  hw-obj-y += wdt_i6300esb.o
>  
> -hw-obj-y += pcie.o pcie_port.o
> +hw-obj-y += pcie.o pcie_aer.o pcie_port.o
>  hw-obj-y += msix.o msi.o
>  
>  # PCI network cards
> diff --git a/hw/pcie.h b/hw/pcie.h
> index 8708504..7baa813 100644
> --- a/hw/pcie.h
> +++ b/hw/pcie.h
> @@ -24,6 +24,7 @@
>  #include "hw.h"
>  #include "pci_regs.h"
>  #include "pcie_regs.h"
> +#include "pcie_aer.h"
>  
>  typedef enum {
>      /* for attention and power indicator */
> @@ -79,6 +80,19 @@ struct PCIExpressDevice {
>                           Software Notification of Hot-Plug Events, an 
> interrupt
>                           is sent whenever the logical and of these conditions
>                           transitions from false to true. */
> +
> +    /* AER */
> +    uint16_t aer_cap;
> +    PCIEAERLog aer_log;
> +    unsigned int aer_intx;      /* INTx for error reporting
> +                                 * default is 0 = INTA#
> +                                 * If the chip wants to use other interrupt
> +                                 * line, initialize this member with the
> +                                 * desired number.
> +                                 * If the chip dynamically changes this 
> member,
> +                                 * also initialize it when loaded as
> +                                 * appropreately.
> +                                 */
>  };
>  
>  /* PCI express capability helper functions */
> diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c
> new file mode 100644
> index 0000000..84c3457
> --- /dev/null
> +++ b/hw/pcie_aer.c
> @@ -0,0 +1,825 @@
> +/*
> + * pcie_aer.c
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "sysemu.h"
> +#include "pci_bridge.h"
> +#include "pcie.h"
> +#include "msix.h"
> +#include "msi.h"
> +#include "pci_internals.h"
> +#include "pcie_regs.h"
> +
> +//#define DEBUG_PCIE
> +#ifdef DEBUG_PCIE
> +# define PCIE_DPRINTF(fmt, ...)                                         \
> +    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
> +#else
> +# define PCIE_DPRINTF(fmt, ...) do {} while (0)
> +#endif
> +#define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
> +    PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
> +
> +static void pcie_aer_clear_error(PCIDevice *dev);
> +static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
> +

so what exactly is the order of calls that makes
removing the forward declarations impractical?
Is there a recursive call? If yes I'd like to
see it documented much better.

> +/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
> +static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
> +{
> +    switch (status) {
> +    case PCI_ERR_UNC_INTN:
> +    case PCI_ERR_UNC_DLP:
> +    case PCI_ERR_UNC_SDN:
> +    case PCI_ERR_UNC_RX_OVER:
> +    case PCI_ERR_UNC_FCP:
> +    case PCI_ERR_UNC_MALF_TLP:
> +        return PCI_ERR_ROOT_CMD_FATAL_EN;
> +    case PCI_ERR_UNC_POISON_TLP:
> +    case PCI_ERR_UNC_ECRC:
> +    case PCI_ERR_UNC_UNSUP:
> +    case PCI_ERR_UNC_COMP_TIME:
> +    case PCI_ERR_UNC_COMP_ABORT:
> +    case PCI_ERR_UNC_UNX_COMP:
> +    case PCI_ERR_UNC_ACSV:
> +    case PCI_ERR_UNC_MCBTLP:
> +    case PCI_ERR_UNC_ATOP_EBLOCKED:
> +    case PCI_ERR_UNC_TLP_PRF_BLOCKED:
> +        return PCI_ERR_ROOT_CMD_NONFATAL_EN;
> +    default:
> +        abort();
> +        break;
> +    }
> +    return PCI_ERR_ROOT_CMD_FATAL_EN;
> +}
> +
> +static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
> +{
> +    if (aer_log->log_num == aer_log->log_max) {
> +        return -1;
> +    }
> +    memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
> +    aer_log->log_num++;
> +    return 0;
> +}
> +
> +static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
> +{
> +    assert(aer_log->log_num);
> +    *err = aer_log->log[0];
> +    aer_log->log_num--;
> +    memmove(&aer_log->log[0], &aer_log->log[1],
> +            aer_log->log_num * sizeof *err);
> +}
> +
> +static void aer_log_clear_all_err(PCIEAERLog *aer_log)
> +{
> +    aer_log->log_num = 0;
> +}
> +
> +void pcie_aer_init(PCIDevice *dev, uint16_t offset)
> +{
> +    PCIExpressDevice *exp;
> +
> +    pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
> +    pci_word_test_and_set_mask(dev->w1cmask + PCI_STATUS,
> +                               PCI_STATUS_SIG_SYSTEM_ERROR);
> +

I would say we should just set these for all devices.
But if we do my concern is that guest might write 1 to this register,
then we migrate to an old guest and that one can not clear this bit.
Thoughts? Let's add a flag so old machine types can disable this?


Also - what about other bits in the status register?

> +    pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
> +                        offset, PCI_ERR_SIZEOF);
> +    exp = &dev->exp;
> +    exp->aer_cap = offset;
> +    if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
> +        dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
> +    }
> +    if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_MAX) {
> +        dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_MAX;
> +    }

So someone should set log_max beforehand? And an illegal value is
rounded down?  How is this API supposed to be used?

> +    dev->exp.aer_log.log = qemu_mallocz(sizeof dev->exp.aer_log.log[0] *
> +                                        dev->exp.aer_log.log_max);
> +
> +    /* On reset PCI_ERR_CAP_MHRE is disabled
> +     * PCI_ERR_CAP_MHRE is RWS so that reset doesn't affect related
> +     * registers
> +     */

I am not sure I agree. There are different kinds of reset. Full system
reset certainly clears this register.  When we do add a concept of a hot
reset (e.g. with FLR), we should also add stickymask in the device.  But
for now let's keep it simple.

> +    pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
> +                 PCI_ERR_UNC_SUPPORTED);
> +
> +    pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
> +                 PCI_ERR_UNC_SEVERITY_DEFAULT);
> +    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
> +                 PCI_ERR_UNC_SUPPORTED);
> +
> +    pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
> +                               PCI_ERR_COR_STATUS);
> +
> +    pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
> +                 PCI_ERR_COR_MASK_DEFAULT);
> +    pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
> +                 PCI_ERR_COR_SUPPORTED);
> +
> +    /* capabilities and control. multiple header logging is supported */
> +    if (dev->exp.aer_log.log_max > 0) {
> +        pci_set_long(dev->config + offset + PCI_ERR_CAP,
> +                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
> +                     PCI_ERR_CAP_MHRC);
> +        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
> +                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
> +                     PCI_ERR_CAP_MHRE);
> +    } else {
> +        pci_set_long(dev->config + offset + PCI_ERR_CAP,
> +                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
> +        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
> +                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
> +    }
> +
> +    switch (pcie_cap_get_type(dev)) {
> +    case PCI_EXP_TYPE_ROOT_PORT:
> +        /* this case will be set by pcie_aer_root_init() */
> +        /* fallthrough */
> +    case PCI_EXP_TYPE_DOWNSTREAM:
> +    case PCI_EXP_TYPE_UPSTREAM:
> +        pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
> +                                   PCI_BRIDGE_CTL_SERR);
> +        pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
> +                                   PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
> +        break;
> +    default:
> +        /* nothing */
> +        break;
> +    }
> +}
> +
> +void pcie_aer_exit(PCIDevice *dev)
> +{
> +    qemu_free(dev->exp.aer_log.log);
> +}
> +
> +static void pcie_aer_rebuild_uncor_status(PCIDevice *dev)

rebuild -> update

> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    PCIEAERLog *aer_log = &dev->exp.aer_log;
> +
> +    uint16_t i;
> +    for (i = 0; i < aer_log->log_num; i++) {
> +        pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
> +                                   dev->exp.aer_log.log[i].status);
> +    }
> +}
> +
> +void pcie_aer_write_config(PCIDevice *dev,
> +                           uint32_t addr, uint32_t val, int len)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
> +    uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
> +    uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
> +
> +    /* uncorrectable error */
> +    if (!(uncorsta & first_error)) {
> +        /* the bit that corresponds to the first error is cleared */
> +        pcie_aer_clear_error(dev);
> +    } else if (errcap & PCI_ERR_CAP_MHRE) {
> +        /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
> +         * nothing should happen. So we have to revert the modification to
> +         * the register.
> +         */
> +        pcie_aer_rebuild_uncor_status(dev);
> +    } else {
> +        /* capability & control
> +         * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
> +         */
> +        aer_log_clear_all_err(&dev->exp.aer_log);
> +    }
> +}
> +
> +/*
> + * return value:
> + * true: error message is sent up
> + * false: error message is masked
> + */

OK I figured the return value out but what does this do?
Specifically what does alldev stand for?

> +static bool
> +pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
> +{
> +    uint16_t cmd = pci_get_word(dev->config + PCI_COMMAND);
> +    bool transmit1 =
> +        pcie_aer_msg_is_uncor(msg) && (cmd & PCI_COMMAND_SERR);
> +    uint16_t devctl = pci_get_word(dev->config +
> +                                   dev->exp.exp_cap + PCI_EXP_DEVCTL);
> +    bool transmit2 = msg->severity & devctl;
> +    PCIDevice *parent_port;
> +
> +    if (transmit1) {
> +        if (pcie_aer_msg_is_uncor(msg)) {
> +            /* Signaled System Error */
> +            pci_word_test_and_set_mask(dev->config + PCI_STATUS,
> +                                       PCI_STATUS_SIG_SYSTEM_ERROR);
> +        }
> +    }
> +
> +    if (!(transmit1 || transmit2)) {
> +        return false;
> +    } 

transmit1 and transmit2 just seem to confuse. And transmit 1 already
includes pcie_aer_msg_is_uncor condition.
How about open-coding it all:

    if (pcie_aer_msg_is_uncor(msg) &&
        (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR)) {
            /* Signaled System Error */
            pci_word_test_and_set_mask(dev->config + PCI_STATUS,
                                       PCI_STATUS_SIG_SYSTEM_ERROR);
    } else if (!(msg->severity & pci_get_word(dev->config +
                                   dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
        return false;
    }

Took me a while to figure this out.  Maybe add this bit from the spec:

        When Set, this bit enables reporting of Non-fatal and Fatal
        errors detected by the Function to the Root Complex. Note that
        errors are reported if enabled either through this bit or through
        the PCI Express specific bits in the Device Control register (see
        Section 7.8.4).


> +
> +    /* send up error message */
> +    if (pci_is_express(dev) &&
> +        pcie_cap_get_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> +        /* Root port notify system itself,
> +           or send the error message to root complex event collector. */
> +        /*
> +         * if root port is associated to event collector, set
> +         * parent_port = root complex event collector
> +         * For now root complex event collector isn't supported.
> +         */
> +        parent_port = NULL;
> +    } else {
> +        parent_port = pci_bridge_get_device(dev->bus);
> +    }
> +    if (parent_port) {
> +        if (!pci_is_express(parent_port)) {
> +            /* just ignore it */
> +            return false;
> +        }
> +        pcie_aer_msg(parent_port, msg);
> +    }
> +    return true;
> +}
> +
> +/*
> + * return value:
> + * true: error message is sent up
> + * false: error message is masked
> + */
> +static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
> +{
> +    uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
> +
> +    if (pcie_aer_msg_is_uncor(msg)) {
> +        /* Received System Error */
> +        pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
> +                                   PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
> +    }
> +
> +    if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
> +        return false;
> +    }
> +    return true;
> +}
> +
> +void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    assert(vector < PCI_ERR_ROOT_IRQ_MAX);
> +    pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
> +                                 PCI_ERR_ROOT_IRQ);
> +    pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
> +                               ((uint32_t)vector) << PCI_ERR_ROOT_IRQ_SHIFT);

We don't really need the cast, do we?

> +}
> +
> +static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
> +    return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
> +}
> +
> +/*
> + * return value:
> + * true: error message is sent up
> + * false: error message is masked
> + */
> +static bool pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
> +{
> +    bool msg_sent;
> +    uint16_t cmd;
> +    uint8_t *aer_cap;
> +    uint32_t root_cmd;
> +    uint32_t root_sta;

root_sta -> root_status

> +    bool msi_trigger;
> +
> +    msg_sent = false;
> +    cmd = pci_get_word(dev->config + PCI_COMMAND);
> +    aer_cap = dev->config + dev->exp.aer_cap;
> +    root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
> +    root_sta = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
> +    msi_trigger = false;

Seems unused?

> +
> +    if (cmd & PCI_COMMAND_SERR) {
> +        /* System Error. Platform Specific */
> +        /* msg_sent = true; */

What goes on here?

> +    }
> +
> +    /* Errro Message Received: Root Error Status register */
> +    switch (msg->severity) {
> +    case PCI_ERR_ROOT_CMD_COR_EN:
> +        if (root_sta & PCI_ERR_ROOT_COR_RCV) {
> +            root_sta |= PCI_ERR_ROOT_MULTI_COR_RCV;
> +        } else {
> +            if (root_cmd & PCI_ERR_ROOT_CMD_COR_EN) {
> +                msi_trigger = true;
> +            }
> +            pci_set_word(aer_cap + PCI_ERR_ROOT_COR_SRC, msg->source_id);
> +        }
> +        root_sta |= PCI_ERR_ROOT_COR_RCV;
> +        break;
> +    case PCI_ERR_ROOT_CMD_NONFATAL_EN:
> +        if (!(root_sta & PCI_ERR_ROOT_NONFATAL_RCV) &&
> +            root_cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) {
> +            msi_trigger = true;
> +        }
> +        root_sta |= PCI_ERR_ROOT_NONFATAL_RCV;
> +        break;
> +    case PCI_ERR_ROOT_CMD_FATAL_EN:
> +        if (!(root_sta & PCI_ERR_ROOT_FATAL_RCV) &&
> +            root_cmd & PCI_ERR_ROOT_CMD_FATAL_EN) {
> +            msi_trigger = true;
> +        }
> +        if (!(root_sta & PCI_ERR_ROOT_UNCOR_RCV)) {
> +            root_sta |= PCI_ERR_ROOT_FIRST_FATAL;
> +        }
> +        root_sta |= PCI_ERR_ROOT_FATAL_RCV;
> +        break;
> +    default:
> +        abort();
> +        break;
> +    }
> +    if (pcie_aer_msg_is_uncor(msg)) {
> +        if (root_sta & PCI_ERR_ROOT_UNCOR_RCV) {
> +            root_sta |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
> +        } else {
> +            pci_set_word(aer_cap + PCI_ERR_ROOT_SRC, msg->source_id);
> +        }
> +        root_sta |= PCI_ERR_ROOT_UNCOR_RCV;
> +    }
> +    pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_sta);
> +
> +    if (root_cmd & msg->severity) {
> +        /* 6.2.4.1.2 Interrupt Generation */
> +        if (pci_msi_enabled(dev)) {
> +            pci_msi_notify(dev, pcie_aer_root_get_vector(dev));
> +        } else {
> +            qemu_set_irq(dev->irq[dev->exp.aer_intx], 1);
> +        }
> +        msg_sent = true;
> +    }
> +    return msg_sent;
> +}
> +
> +static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
> +{
> +    uint8_t type;
> +    bool msg_sent;
> +
> +    assert(pci_is_express(dev));
> +
> +    type = pcie_cap_get_type(dev);
> +    if (type == PCI_EXP_TYPE_ROOT_PORT ||
> +        type == PCI_EXP_TYPE_UPSTREAM ||
> +        type == PCI_EXP_TYPE_DOWNSTREAM) {
> +        msg_sent = pcie_aer_msg_vbridge(dev, msg);
> +        if (!msg_sent) {
> +            return;
> +        }
> +    }
> +    msg_sent= pcie_aer_msg_alldev(dev, msg);

space before =

> +    if (type == PCI_EXP_TYPE_ROOT_PORT && msg_sent) {
> +        pcie_aer_msg_root_port(dev, msg);
> +    }
> +}
> +
> +static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    uint8_t first_bit = ffsl(err->status) - 1;
> +    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
> +    int i;
> +
> +    assert(err->status);
> +    assert(err->status & (err->status - 1));
> +
> +    errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
> +    errcap |= PCI_ERR_CAP_FEP(first_bit);
> +
> +    if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
> +        for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
> +            /* 7.10.8 Header Log Register */
> +            uint8_t *header_log =
> +                aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
> +            cpu_to_be32wu((uint32_t*)header_log, err->header[i]);
> +        }
> +    } else {
> +        assert(!(err->flags & PCIE_AER_ERR_TLP_PRESENT));
> +        memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
> +    }
> +
> +    if ((err->flags & PCIE_AER_ERR_TLP_PRESENT) &&
> +        (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2) &
> +         PCI_EXP_DEVCAP2_EETLPP)) {
> +        for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
> +            /* 7.10.12 tlp prefix log register */
> +            uint8_t *prefix_log =
> +                aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
> +            cpu_to_be32wu((uint32_t*)prefix_log, err->prefix[i]);
> +        }
> +        errcap |= PCI_ERR_CAP_TLP;
> +    } else {
> +        memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
> +               PCI_ERR_TLP_PREFIX_LOG_SIZE);
> +    }
> +    pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
> +}
> +
> +static void pcie_aer_clear_log(PCIDevice *dev)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +
> +    pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
> +                                 PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
> +
> +    memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
> +    memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
> +}
> +
> +static int pcie_aer_record_error(PCIDevice *dev,
> +                                 const PCIEAERErr *err)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
> +    int fep = PCI_ERR_CAP_FEP(errcap);
> +
> +    assert(err->status);
> +    assert(err->status & (err->status - 1));
> +
> +    if (errcap & PCI_ERR_CAP_MHRE &&
> +        (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
> +        /*  Not first error. queue error */
> +        if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
> +            /* overflow */
> +            return -1;
> +        }
> +        return 0;
> +    }
> +
> +    pcie_aer_update_log(dev, err);
> +    return 0;
> +}
> +
> +static void pcie_aer_clear_error(PCIDevice *dev)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
> +    PCIEAERLog *aer_log = &dev->exp.aer_log;
> +    PCIEAERErr err;
> +
> +    if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
> +        pcie_aer_clear_log(dev);
> +        return;
> +    }
> +
> +    /*
> +     * If more errors are queued, set corresponding bits in uncorrectable
> +     * error status.
> +     * We emulate uncorrectable error status register as W1CS.
> +     * So set bit in uncorrectable error status here again for multiple
> +     * error recording support.
> +     *
> +     * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
> +     */
> +    pcie_aer_rebuild_uncor_status(dev);
> +
> +    aer_log_del_err(aer_log, &err);
> +    pcie_aer_update_log(dev, &err);
> +}
> +
> +typedef struct PCIEAERInject {
> +    PCIDevice *dev;
> +    uint8_t *aer_cap;
> +    const PCIEAERErr *err;
> +    uint16_t devctl;
> +    uint16_t devsta;
> +    uint32_t status;

Find some better names to avoid confusion between devsta and status?
Also, status is always a single bit. Pass bit number?

> +    bool is_unsupported_request;

is_unsupported_request -> unsupported_request

> +    int *is_header_log_overflowed;

is_header_log_overflowed -> log_overflow
(also - why int?)

> +    PCIEAERMsg *msg;

This is too hacky. Just make msg and log overflow
structure members, don't pass pointers.

> +} PCIEAERInject;
> +
> +static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
> +                                      uint32_t uncor_status,
> +                                      bool is_advisory_nonfatal)
> +{
> +    PCIDevice *dev = inj->dev;
> +
> +    inj->devsta |= PCI_EXP_DEVSTA_CED;
> +    if (inj->is_unsupported_request) {
> +        inj->devsta |= PCI_EXP_DEVSTA_URD;
> +    }
> +    pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, 
> inj->devsta);
> +
> +    if (inj->aer_cap) {
> +        uint32_t mask;
> +        pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
> +                                   inj->status);
> +        mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
> +        if (mask & inj->status) {
> +            return false;
> +        }
> +        if (is_advisory_nonfatal) {
> +            uint32_t uncor_mask =
> +                pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
> +            if (!(uncor_mask & uncor_status)) {
> +                *inj->is_header_log_overflowed =
> +                    pcie_aer_record_error(dev, inj->err);
> +            }
> +            pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
> +                                       uncor_status);
> +        }
> +    }
> +
> +    if (inj->is_unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) 
> {
> +        return false;
> +    }
> +    if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
> +        return false;
> +    }
> +
> +    inj->msg->severity = PCI_ERR_ROOT_CMD_COR_EN;
> +    return true;
> +}
> +
> +static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
> +{
> +    PCIDevice *dev = inj->dev;
> +    uint16_t cmd;
> +
> +    if (is_fatal) {
> +        inj->devsta |= PCI_EXP_DEVSTA_FED;
> +    } else {
> +        inj->devsta |= PCI_EXP_DEVSTA_NFED;
> +    }
> +    if (inj->is_unsupported_request) {
> +        inj->devsta |= PCI_EXP_DEVSTA_URD;
> +    }
> +    pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, 
> inj->devsta);
> +
> +    if (inj->aer_cap) {
> +        uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
> +        if (mask & inj->status) {
> +            pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
> +                                       inj->status);
> +            return false;
> +        }
> +
> +        *inj->is_header_log_overflowed = pcie_aer_record_error(dev, 
> inj->err);
> +        pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
> +                                   inj->status);
> +    }
> +
> +    cmd = pci_get_word(dev->config + PCI_COMMAND);
> +    if (inj->is_unsupported_request &&
> +        !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
> +        return false;
> +    }
> +    if (is_fatal) {
> +        if (!((cmd & PCI_COMMAND_SERR) ||
> +              (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
> +            return false;
> +        }
> +        inj->msg->severity = PCI_ERR_ROOT_CMD_FATAL_EN;
> +    } else {
> +        if (!((cmd & PCI_COMMAND_SERR) ||
> +              (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
> +            return false;
> +        }
> +        inj->msg->severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
> +    }
> +    return true;
> +}
> +
> +/*
> + * non-Function specific error must be recorded in all functions.
> + * It is the responsibility of the caller of this function.
> + * It is also caller's responsiblity to determine which function should
> + * report the rerror.
> + *
> + * 6.2.4 Error Logging
> + * 6.2.5 Sqeunce of Device Error Signaling and Logging Operations
> + * table 6-2: Flowchard Showing Sequence of Device Error Signaling and 
> Logging
> + *            Operations
> + */
> +int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
> +{
> +    uint8_t *aer_cap = NULL;
> +    uint16_t devctl = 0;
> +    uint16_t devsta = 0;
> +    uint32_t status = err->status;
> +    bool is_unsupported_request =
> +        (!(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
> +         err->status == PCI_ERR_UNC_UNSUP);
> +    PCIEAERMsg msg;
> +    int is_header_log_overflowed = 0;
> +
> +    if (!pci_is_express(dev)) {
> +        return -1;
> +    }
> +
> +    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
> +        status &= PCI_ERR_COR_SUPPORTED;
> +    } else {
> +        status &= PCI_ERR_UNC_SUPPORTED;
> +    }
> +
> +    /* invalid status bit. one and only one bit must be set */
> +    if (!status || (status & (status - 1))) {
> +        return -1;
> +    }
> +
> +    if (dev->exp.aer_cap) {
> +        uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
> +        aer_cap = dev->config + dev->exp.aer_cap;
> +        devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
> +        devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
> +    }
> +    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
> +        PCIEAERInject inj = {
> +            .dev = dev,
> +            .aer_cap = aer_cap,
> +            .err = err,
> +            .devctl = devctl,
> +            .devsta = devsta,
> +            .status = status,
> +            .is_unsupported_request = is_unsupported_request,
> +            .is_header_log_overflowed = &is_header_log_overflowed,
> +            .msg = &msg,
> +        };
> +        if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
> +            return 0;
> +        }
> +    } else {
> +        bool is_fatal =
> +            pcie_aer_uncor_default_severity(status) ==
> +            PCI_ERR_ROOT_CMD_FATAL_EN;
> +
> +        if (aer_cap) {
> +            is_fatal = status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
> +        }
> +        if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
> +            PCIEAERInject inj = {
> +                .dev = dev,
> +                .aer_cap = aer_cap,
> +                .err = err,
> +                .devctl = devctl,
> +                .devsta = devsta,
> +                .status = PCI_ERR_COR_ADV_NONFATAL,
> +                .is_unsupported_request = is_unsupported_request,
> +                .is_header_log_overflowed = &is_header_log_overflowed,
> +                .msg = &msg,
> +            };
> +            if (!pcie_aer_inject_cor_error(&inj, status, true)) {
> +                return 0;
> +            }
> +        } else {
> +            PCIEAERInject inj = {
> +                .dev = dev,
> +                .aer_cap = aer_cap,
> +                .err = err,
> +                .devctl = devctl,
> +                .devsta = devsta,
> +                .status = status,
> +                .is_unsupported_request = is_unsupported_request,
> +                .is_header_log_overflowed = &is_header_log_overflowed,
> +                .msg = &msg,
> +            };
> +            if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
> +                return 0;
> +            }
> +        }
> +    }
> +
> +    /* send up error message */
> +    msg.source_id = err->source_id;
> +    pcie_aer_msg(dev, &msg);
> +
> +    if (is_header_log_overflowed) {
> +        PCIEAERErr header_log_overflow = {
> +            .status = PCI_ERR_COR_HL_OVERFLOW,
> +            .flags = PCIE_AER_ERR_IS_CORRECTABLE,
> +        };
> +        int ret = pcie_aer_inject_error(dev, &header_log_overflow);
> +        assert(!ret);
> +    }
> +    return 0;
> +}
> +
> +void pcie_aer_root_init(PCIDevice *dev)
> +{
> +    uint16_t pos = dev->exp.aer_cap;
> +
> +    pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
> +                 PCI_ERR_ROOT_CMD_EN_MASK);
> +    pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
> +                 PCI_ERR_ROOT_STATUS_REPORT_MASK);
> +}
> +
> +void pcie_aer_root_reset(PCIDevice *dev)
> +{
> +    uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
> +
> +    pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
> +
> +    /*
> +     * Advanced Error Interrupt Message Number in Root Error Status Register
> +     * must be updated by chip dependent code because it's chip dependent
> +     * which number is used.
> +     */
> +}
> +
> +static bool pcie_aer_root_does_trigger(uint32_t cmd, uint32_t status)
> +{
> +    return
> +        ((cmd & PCI_ERR_ROOT_CMD_COR_EN) && (status & PCI_ERR_ROOT_COR_RCV)) 
> ||
> +        ((cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) &&
> +         (status & PCI_ERR_ROOT_NONFATAL_RCV)) ||
> +        ((cmd & PCI_ERR_ROOT_CMD_FATAL_EN) &&
> +         (status & PCI_ERR_ROOT_FATAL_RCV));
> +}
> +
> +void pcie_aer_root_write_config(PCIDevice *dev,
> +                                uint32_t addr, uint32_t val, int len,
> +                                uint32_t root_cmd_prev)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +
> +    /* root command register */
> +    uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
> +    if (root_cmd & PCI_ERR_ROOT_CMD_EN_MASK) {
> +        /* 6.2.4.1.2 Interrupt Generation */
> +
> +        /* 0 -> 1 */
> +        uint32_t root_cmd_set = (root_cmd_prev ^ root_cmd) & root_cmd;
> +        uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
> +
> +        if (pci_msi_enabled(dev)) {
> +            if (pcie_aer_root_does_trigger(root_cmd_set, root_status)) {
> +                pci_msi_notify(dev, pcie_aer_root_get_vector(dev));
> +            }
> +        } else {
> +            int int_level = pcie_aer_root_does_trigger(root_cmd, 
> root_status);
> +            qemu_set_irq(dev->irq[dev->exp.aer_intx], int_level);
> +        }
> +    }
> +}
> +
> +static const VMStateDescription vmstate_pcie_aer_err = {
> +    .name = "PCIE_AER_ERROR",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .minimum_version_id_old = 1,
> +    .fields     = (VMStateField[]) {
> +        VMSTATE_UINT32(status, PCIEAERErr),
> +        VMSTATE_UINT16(source_id, PCIEAERErr),
> +        VMSTATE_UINT16(flags, PCIEAERErr),
> +        VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
> +        VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +#define VMSTATE_PCIE_AER_ERRS(_field, _state, _field_num, _vmsd, _type) { \
> +    .name       = (stringify(_field)),                                    \
> +    .version_id = 0,                                                      \
> +    .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),     \
> +    .size       = sizeof(_type),                                          \
> +    .vmsd       = &(_vmsd),                                               \
> +    .flags      = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT,           \
> +    .offset     = vmstate_offset_pointer(_state, _field, _type),          \
> +}
> +
> +const VMStateDescription vmstate_pcie_aer_log = {
> +    .name = "PCIE_AER_ERROR_LOG",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .minimum_version_id_old = 1,
> +    .fields     = (VMStateField[]) {
> +        VMSTATE_UINT16(log_num, PCIEAERLog),
> +        VMSTATE_UINT16(log_max, PCIEAERLog),
> +        VMSTATE_PCIE_AER_ERRS(log, PCIEAERLog, log_num,
> +                              vmstate_pcie_aer_err, PCIEAERErr),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> diff --git a/hw/pcie_aer.h b/hw/pcie_aer.h
> new file mode 100644
> index 0000000..0f4b4eb
> --- /dev/null
> +++ b/hw/pcie_aer.h
> @@ -0,0 +1,94 @@
> +/*
> + * pcie_aer.h
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef QEMU_PCIE_AER_H
> +#define QEMU_PCIE_AER_H
> +
> +#include "hw.h"
> +
> +/* definitions which PCIExpressDevice uses */
> +
> +/* AER log */
> +struct PCIEAERLog {
> +    /* This structure is saved/loaded.
> +       So explicitly size them instead of unsigned int */
> +    uint16_t log_num;
> +
> +#define PCIE_AER_LOG_MAX_DEFAULT        8
> +#define PCIE_AER_LOG_MAX_MAX            128 /* what is appropriate? */

MAX MAX?

> +#define PCIE_AER_LOG_MAX_UNSET          0xffff
> +    uint16_t log_max;
> +
> +    PCIEAERErr *log;    /* ringed buffer */
> +};
> +
> +/* aer error message: error signaling message has only error sevirity and
> +   source id. See 2.2.8.3 error signaling messages */
> +struct PCIEAERMsg {
> +    /*
> +     * PCI_ERR_ROOT_CMD_{COR, NONFATAL, FATAL}_EN
> +     * = PCI_EXP_DEVCTL_{CERE, NFERE, FERE}
> +     */
> +    uint32_t severity;
> +
> +    uint16_t source_id; /* bdf */
> +};
> +
> +static inline bool
> +pcie_aer_msg_is_uncor(const PCIEAERMsg *msg)
> +{
> +    return msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN ||
> +        msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN;
> +}
> +
> +/* error */
> +struct PCIEAERErr {
> +    uint32_t status;    /* error status bits */
> +    uint16_t source_id; /* bdf */
> +
> +#define PCIE_AER_ERR_IS_CORRECTABLE     0x1     /* correctable/uncorrectable 
> */
> +#define PCIE_AER_ERR_MAYBE_ADVISORY     0x2     /* maybe advisory non-fatal 
> */
> +#define PCIE_AER_ERR_HEADER_VALID       0x4     /* TLP header is logged */
> +#define PCIE_AER_ERR_TLP_PRESENT        0x8     /* TLP Prefix is logged */
> +    uint16_t flags;
> +
> +    uint32_t header[4]; /* TLP header */
> +    uint32_t prefix[4]; /* TLP header prefix */
> +};
> +
> +extern const VMStateDescription vmstate_pcie_aer_log;
> +
> +void pcie_aer_init(PCIDevice *dev, uint16_t offset);
> +void pcie_aer_exit(PCIDevice *dev);
> +void pcie_aer_write_config(PCIDevice *dev,
> +                           uint32_t addr, uint32_t val, int len);
> +
> +/* aer root port */
> +void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector);
> +void pcie_aer_root_init(PCIDevice *dev);
> +void pcie_aer_root_reset(PCIDevice *dev);
> +void pcie_aer_root_write_config(PCIDevice *dev,
> +                                uint32_t addr, uint32_t val, int len,
> +                                uint32_t root_cmd_prev);
> +
> +/* error injection */
> +int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
> +
> +#endif /* QEMU_PCIE_AER_H */
> diff --git a/qemu-common.h b/qemu-common.h
> index 21fc3a5..eb4d9bd 100644
> --- a/qemu-common.h
> +++ b/qemu-common.h
> @@ -239,6 +239,9 @@ typedef struct PCIBus PCIBus;
>  typedef struct PCIDevice PCIDevice;
>  typedef struct PCIExpressDevice PCIExpressDevice;
>  typedef struct PCIBridge PCIBridge;
> +typedef struct PCIEAERMsg PCIEAERMsg;
> +typedef struct PCIEAERLog PCIEAERLog;
> +typedef struct PCIEAERErr PCIEAERErr;
>  typedef struct PCIEPort PCIEPort;
>  typedef struct PCIESlot PCIESlot;
>  typedef struct SerialState SerialState;
> -- 
> 1.7.1.1



reply via email to

[Prev in Thread] Current Thread [Next in Thread]