qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH v2 08/13] spapr_pci: Enable DDW


From: Alexey Kardashevskiy
Subject: Re: [Qemu-devel] [RFC PATCH v2 08/13] spapr_pci: Enable DDW
Date: Tue, 26 Aug 2014 18:11:04 +1000
User-agent: Mozilla/5.0 (X11; Linux i686 on x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.0

On 08/26/2014 05:14 PM, David Gibson wrote:
> On Fri, Aug 15, 2014 at 08:12:30PM +1000, Alexey Kardashevskiy wrote:
>> This implements DDW for emulated PHB.
>>
>> This advertises DDW in device tree.
>>
>> Since QEMU does not implement any 64bit DMA capable device, this hack
>> has been used to enable 64bit DMA on E1000:
>>
>> diff --git a/hw/net/e1000.c b/hw/net/e1000.c
>> index 0fc29a0..131f80a 100644
>> --- a/hw/net/e1000.c
>> +++ b/hw/net/e1000.c
>> @@ -240,6 +240,7 @@ static const uint32_t mac_reg_init[] = {
>>      [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
>>                  E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
>>                  E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
>> +                E1000_STATUS_PCIX_MODE |
>>                  E1000_STATUS_LU,
>>      [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
>>                  E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
> 
> Are you planning to send a patch to enable 64-bit DMA in the e1000
> device properly?

Nope. The e1000 family has pci and pcix devices and so far QEMU emulated
pci one with its own vendor/device ids. I would either have to change
device id to pcix version and enable that pcix bit or add new e1000-pcix
device and I do not see any real demand on this as we have virtio-pci which
is way cooler and faster and everything :)


> 
>>
>> Signed-off-by: Alexey Kardashevskiy <address@hidden>
>> ---
>> Changes:
>> v2:
>> * tested on hacked emulated E1000
>> * implemented DDW reset on the PHB reset
>> * spapr_pci_ddw_remove/spapr_pci_ddw_reset are public for reuse by VFIO
>> ---
>>  hw/ppc/spapr_pci.c          | 96 
>> +++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/pci-host/spapr.h |  7 ++++
>>  2 files changed, 103 insertions(+)
>>
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index 9b03d0d..cba8d9d 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -22,6 +22,7 @@
>>   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>>   * THE SOFTWARE.
>>   */
>> +#include "sysemu/sysemu.h"
>>  #include "hw/hw.h"
>>  #include "hw/pci/pci.h"
>>  #include "hw/pci/msi.h"
>> @@ -498,6 +499,70 @@ void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr 
>> addr)
>>  }
>>  
>>  /*
>> + * Dynamic DMA windows
>> + */
>> +static int spapr_pci_ddw_query(sPAPRPHBState *sphb,
>> +                               uint32_t *windows_available,
>> +                               uint32_t *page_size_mask)
>> +{
>> +    *windows_available = 1;
>> +    *page_size_mask = DDW_PGSIZE_16M;
>> +
>> +    return 0;
>> +}
>> +
>> +static int spapr_pci_ddw_create(sPAPRPHBState *sphb, uint32_t page_shift,
>> +                                uint32_t window_shift, uint32_t liobn,
>> +                                sPAPRTCETable **ptcet)
>> +{
>> +    *ptcet = spapr_tce_new_table(DEVICE(sphb), liobn,
>> +                                 SPAPR_PCI_TCE64_START, page_shift,
>> +                                 1ULL << (window_shift - page_shift),
>> +                                 true);
> 
> Does anything actually validate that the specified page_shift is one
> of the permitted/advertised ones?


Nope. Will fix.



>> +    if (!*ptcet) {
>> +        return -1;
>> +    }
>> +    memory_region_add_subregion(&sphb->iommu_root, (*ptcet)->bus_offset,
>> +                                spapr_tce_get_iommu(*ptcet));
>> +
>> +    return 0;
>> +}
>> +
>> +int spapr_pci_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable *tcet)
>> +{
>> +    memory_region_del_subregion(&sphb->iommu_root,
>> +                                spapr_tce_get_iommu(tcet));
>> +    spapr_tce_free_table(tcet);
> 
> Ok, relating to my comment in the previous patch, ddw_num doesn't seem
> to be decremented here either.


@ddw_num is an id, it just have to be unique. If I decrement it, then I'll
have to track what numbers are in use.


>> +
>> +    return 0;
>> +}
>> +
>> +static int spapr_pci_remove_ddw_cb(Object *child, void *opaque)
>> +{
>> +    sPAPRTCETable *tcet;
>> +
>> +    tcet = (sPAPRTCETable *) object_dynamic_cast(child, 
>> TYPE_SPAPR_TCE_TABLE);
>> +
>> +    /* Delete all dynamic windows, i.e. every except the default one with 
>> #0 */
>> +    if (tcet && SPAPR_PCI_DMA_WINDOW_NUM(tcet->liobn)) {
>> +        sPAPRPHBState *sphb = opaque;
>> +        sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>> +
>> +        spc->ddw_remove(sphb, tcet);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +int spapr_pci_ddw_reset(sPAPRPHBState *sphb)
>> +{
>> +    object_child_foreach(OBJECT(sphb), spapr_pci_remove_ddw_cb, sphb);
>> +    sphb->ddw_num = 0;
> 
> So, you do reset ddw_num here, but since it is incremented in the
> generic RTAS code, this smells like a layering violation.

Yeah, good idea to keep in one file at least. Will fix.



>> +
>> +    return 0;
>> +}
>> +
>> +/*
>>   * PHB PCI device
>>   */
>>  static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int 
>> devfn)
>> @@ -671,6 +736,12 @@ static int spapr_phb_children_reset(Object *child, void 
>> *opaque)
>>  
>>  static void spapr_phb_reset(DeviceState *qdev)
>>  {
>> +    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(qdev);
>> +
>> +    if (spc->ddw_reset) {
>> +        spc->ddw_reset(SPAPR_PCI_HOST_BRIDGE(qdev));
>> +    }
>> +
>>      /* Reset the IOMMU state */
>>      object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
>>  }
>> @@ -685,6 +756,7 @@ static Property spapr_phb_properties[] = {
>>      DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
>>      DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
>>                         SPAPR_PCI_IO_WIN_SIZE),
>> +    DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, false),
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>>  
>> @@ -802,6 +874,10 @@ static void spapr_phb_class_init(ObjectClass *klass, 
>> void *data)
>>      set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
>>      dc->cannot_instantiate_with_device_add_yet = false;
>>      spc->finish_realize = spapr_phb_finish_realize;
>> +    spc->ddw_query = spapr_pci_ddw_query;
>> +    spc->ddw_create = spapr_pci_ddw_create;
>> +    spc->ddw_remove = spapr_pci_ddw_remove;
>> +    spc->ddw_reset = spapr_pci_ddw_reset;
>>  }
>>  
>>  static const TypeInfo spapr_phb_info = {
>> @@ -885,6 +961,13 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>      uint32_t interrupt_map_mask[] = {
>>          cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
>>      uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
>> +    uint32_t ddw_applicable[] = {
>> +        RTAS_IBM_QUERY_PE_DMA_WINDOW,
>> +        RTAS_IBM_CREATE_PE_DMA_WINDOW,
>> +        RTAS_IBM_REMOVE_PE_DMA_WINDOW
>> +    };
>> +    uint32_t ddw_extensions[] = { 1, RTAS_IBM_RESET_PE_DMA_WINDOW };
>> +    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(phb);
>>  
>>      /* Start populating the FDT */
>>      sprintf(nodename, "address@hidden" PRIx64, phb->buid);
>> @@ -914,6 +997,19 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>      _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
>>      _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS));
>>  
>> +    /* Dynamic DMA window */
>> +    if (phb->ddw_enabled &&
>> +        spc->ddw_query && spc->ddw_create && spc->ddw_remove) {
>> +        _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", 
>> &ddw_applicable,
>> +                         sizeof(ddw_applicable)));
>> +
>> +        if (spc->ddw_reset) {
>> +            /* When enabled, the guest will remove the default 32bit window 
>> */
> 
> I guess it's not really relevant here, but the reason for availability
> of reset causing the guest to remove the default window seems unclear.


Add "warn"? This is what sles11sp3 does, recent kernels do not try removing
default window though. But by PAPR 2.7 the platform must implement "reset".
Anyway my plan is to repost without ddw_reset() at all and add it later.



>> +            _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions",
>> +                             &ddw_extensions, sizeof(ddw_extensions)));
>> +        }
>> +    }
>> +
>>      /* Build the interrupt-map, this must matches what is done
>>       * in pci_spapr_map_irq
>>       */
>> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
>> index a1fdbb2..0b40fcf 100644
>> --- a/include/hw/pci-host/spapr.h
>> +++ b/include/hw/pci-host/spapr.h
>> @@ -104,6 +104,8 @@ struct sPAPRPHBState {
>>      int32_t msi_devs_num;
>>      spapr_pci_msi_mig *msi_devs;
>>  
>> +    bool ddw_enabled;
>> +
>>      QLIST_ENTRY(sPAPRPHBState) list;
>>  };
>>  
>> @@ -126,6 +128,9 @@ struct sPAPRPHBVFIOState {
>>  
>>  #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
>>  
>> +/* Default 64bit dynamic window offset */
>> +#define SPAPR_PCI_TCE64_START        0x8000000000000000ULL
>> +
>>  static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int 
>> pin)
>>  {
>>      return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq);
>> @@ -144,5 +149,7 @@ void spapr_pci_rtas_init(void);
>>  sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid);
>>  PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid,
>>                                uint32_t config_addr);
>> +int spapr_pci_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable *tcet);
>> +int spapr_pci_ddw_reset(sPAPRPHBState *sphb);
>>  
>>  #endif /* __HW_SPAPR_PCI_H__ */
> 


-- 
Alexey



reply via email to

[Prev in Thread] Current Thread [Next in Thread]