qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [V15 3/4] hw/i386: Introduce AMD IOMMU


From: David Kiarie
Subject: Re: [Qemu-devel] [V15 3/4] hw/i386: Introduce AMD IOMMU
Date: Tue, 9 Aug 2016 16:17:36 +0300

On Tue, Aug 9, 2016 at 4:01 PM, Valentine Sinitsyn <
address@hidden> wrote:

> Hi all,
>
> On 09.08.2016 17:52, David Kiarie wrote:
>
>>
>>
>> On Tue, Aug 9, 2016 at 8:44 AM, Peter Xu <address@hidden
>> <mailto:address@hidden>> wrote:
>>
>>     On Tue, Aug 02, 2016 at 11:39:06AM +0300, David Kiarie wrote:
>>
>>     [...]
>>
>>     > +/* invalidate internal caches for devid */
>>     > +typedef struct QEMU_PACKED {
>>     > +#ifdef HOST_WORDS_BIGENDIAN
>>     > +    uint64_t devid;                /* device to invalidate   */
>>     > +    uint64_t reserved_1:44;
>>     > +    uint64_t type:4;               /* command type           */
>>     > +#else
>>     > +    uint64_t devid;
>>     > +    uint64_t reserved_1:44;
>>     > +    uint64_t type:4;
>>     > +#endif /* __BIG_ENDIAN_BITFIELD */
>>
>>     Guess you forgot to reverse the order of fields in one of above block.
>>
>>
>> Yes, I forgot to reverse order of fields here.
>>
>>
>>
>>     [...]
>>
>>     > +/* load adddress translation info for devid into translation cache
>> */
>>     > +typedef struct QEMU_PACKED {
>>     > +#ifdef HOST_WORDS_BIGENDIAN
>>     > +    uint64_t type:4;          /* command type       */
>>     > +    uint64_t reserved_2:8;
>>     > +    uint64_t pasid_19_0:20;
>>     > +    uint64_t pfcount_7_0:8;
>>     > +    uint64_t reserved_1:8;
>>     > +    uint64_t devid;           /* related devid      */
>>     > +#else
>>     > +    uint64_t devid;
>>     > +    uint64_t reserved_1:8;
>>     > +    uint64_t pfcount_7_0:8;
>>     > +    uint64_t pasid_19_0:20;
>>     > +    uint64_t reserved_2:8;
>>     > +    uint64_t type:4;
>>     > +#endif /* __BIG_ENDIAN_BITFIELD */
>>
>>     For this one, "devid" looks like a 16 bits field?
>>
>>
>> Right. should be 16 bits.
>>
>>
>>
>>     [...]
>>
>>     > +/* issue a PCIe completion packet for devid */
>>     > +typedef struct QEMU_PACKED {
>>     > +#ifdef HOST_WORDS_BIGENDIAN
>>     > +    uint32_t devid;               /* related devid      */
>>     > +    uint32_t reserved_1;
>>     > +#else
>>     > +    uint32_t reserved_1;
>>     > +    uint32_t devid;
>>     > +#endif /* __BIG_ENDIAN_BITFIELD */
>>
>>     Here I am not sure we need this "#ifdef".
>>
>>
>> There's an error here but it's not with the #ifdef but instead I have
>> not set the right bit on the bitfields - for instance devid should be 16.
>>
>>
>>
>>     [...]
>>
>>     > +/* external write */
>>     > +static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
>>     > +{
>>     > +    uint16_t romask = lduw_le_p(&s->romask[addr]);
>>     > +    uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
>>     > +    uint16_t oldval = lduw_le_p(&s->mmior[addr]);
>>     > +    stw_le_p(&s->mmior[addr], (val & ~(val & w1cmask)) | (romask &
>> oldval));
>>
>>     I think the above is problematic, e.g., what if we write 1 to one of
>>     the romask while it's 0 originally? In that case, the RO bit will be
>>     written to 1.
>>
>>     Maybe we need:
>>
>>       stw_le_p(&s->mmior[addr], ((oldval & romask) | (val & ~romask)) & \
>>                                 (val & w1cmask));
>>
>>     Same question to the below two functions.
>>
>>
>> Right. I was very determined to come up with my algo but failed horribly
>> ;-)
>>
>>
>>
>>     > +}
>>     > +
>>     > +static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
>>     > +{
>>     > +    uint32_t romask = ldl_le_p(&s->romask[addr]);
>>     > +    uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
>>     > +    uint32_t oldval = ldl_le_p(&s->mmior[addr]);
>>     > +    stl_le_p(&s->mmior[addr], (val & ~(val & w1cmask)) | (romask &
>> oldval));
>>     > +}
>>     > +
>>     > +static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
>>     > +{
>>     > +    uint64_t romask = ldq_le_p(&s->romask[addr]);
>>     > +    uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
>>     > +    uint32_t oldval = ldq_le_p(&s->mmior[addr]);
>>     > +    stq_le_p(&s->mmior[addr], (val & ~(val & w1cmask)) | (romask &
>> oldval));
>>     > +}
>>     > +
>>     > +/* OR a 64-bit register with a 64-bit value */
>>     > +static bool amdvi_orq(AMDVIState *s, hwaddr addr, uint64_t val)
>>
>>     Nit: This function name gives me an illusion that it's a write op, not
>>     read. IMHO it'll be better we directly use amdvi_readq() for all the
>>     callers of this function, which is more clear to me.
>>
>>     > +{
>>     > +    return amdvi_readq(s, addr) | val;
>>     > +}
>>     > +
>>     > +/* OR a 64-bit register with a 64-bit value storing result in the
>> register */
>>     > +static void amdvi_orassignq(AMDVIState *s, hwaddr addr, uint64_t
>> val)
>>     > +{
>>     > +    amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
>>     > +}
>>     > +
>>     > +/* AND a 64-bit register with a 64-bit value storing result in the
>> register */
>>     > +static void amdvi_and_assignq(AMDVIState *s, hwaddr addr, uint64_t
>> val)
>>
>>     Nit: the name is not matched with above:
>>
>>       amdvi_{or|and}assign[qw]
>>
>>     Though I would prefer:
>>
>>       amdvi_assign_[qw]_{or|and}
>>
>>
>> Your naming sounds better.
>>
>>
>>
>>     [...]
>>
>>     > +static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
>>     > +{
>>     > +    /* event logging not enabled */
>>     > +    if (!s->evtlog_enabled || amdvi_orq(s, AMDVI_MMIO_STATUS,
>>     > +        AMDVI_MMIO_STATUS_EVT_OVF)) {
>>     > +        return;
>>     > +    }
>>     > +
>>     > +    /* event log buffer full */
>>     > +    if (s->evtlog_tail >= s->evtlog_len) {
>>     > +        amdvi_orassignq(s, AMDVI_MMIO_STATUS,
>> AMDVI_MMIO_STATUS_EVT_OVF);
>>     > +        /* generate interrupt */
>>     > +        amdvi_generate_msi_interrupt(s);
>>     > +        return;
>>     > +    }
>>     > +
>>     > +    if (dma_memory_write(&address_space_memory, s->evtlog_len +
>> s->evtlog_tail,
>>     > +        &evt, AMDVI_EVENT_LEN)) {
>>
>>     Check with MEMTX_OK?
>>
>>
>> I'm not sure what exactly you mean here.
>>
>>
>>
>>     [...]
>>
>>     > +/*
>>     > + * AMDVi event structure
>>     > + *    0:15   -> DeviceID
>>     > + *    55:63  -> event type + miscellaneous info
>>     > + *    64:127 -> related address
>>     > + */
>>     > +static void amdvi_encode_event(uint64_t *evt, uint16_t devid,
>> uint64_t addr,
>>     > +                               uint16_t info)
>>     > +{
>>     > +    amdvi_setevent_bits(evt, devid, 0, 16);
>>     > +    amdvi_setevent_bits(evt, info, 55, 8);
>>     > +    amdvi_setevent_bits(evt, addr, 63, 64);
>>                                           ^^
>>                                     should here be 64?
>>
>>     Also, I am not sure whether we need this amdvi_setevent_bits() if it's
>>     only used in this function. Though not a big problem for me.
>>
>>
>> It's only used in this function but I actually wrote his mainly for
>> future use. The idea is that various events encode totally different
>> information while the above is an over-simplified version to encode
>> information common to most events. In case an event wants to encode more
>> information it would turn out much more easier.
>>
>>
>>
>>     > +}
>>     > +/* log an error encountered page-walking
>>
>>     "during page-walking"
>>
>>
>> "encountered page-walking"  sounds right to me. "page-walking" is a
>> verb, in continuous tense, right ? how about I say "during hacking" ;-)
>>
> I'm a non-native, but: isn't "page-walking" a gerund here? Otherwise,
> "during hacking" sounds good to me.
>

"during hacking"  sounds redundant - why should I repeat that an action is
currently happening by using 'during' while that's already clear since it's
in continuous tense ? page-walking sounds like a proper verb IMHO.


> I also got comments on wording sometimes. In this cases I assume my
> initial phrase was misleading, and try to re-phrase it.
>
> Valentine
>
>
>
>>
>>     > + *
>>     > + * @addr: virtual address in translation request
>>     > + */
>>     > +static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
>>     > +                             hwaddr addr, uint16_t info)
>>     > +{
>>     > +    uint64_t evt[4];
>>     > +
>>     > +    info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
>>     > +    amdvi_encode_event(evt, devid, addr, info);
>>     > +    amdvi_log_event(s, evt);
>>     > +    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
>>     > +            PCI_STATUS_SIG_TARGET_ABORT);
>>
>>     Nit: maybe we can provide a function for setting this bit.
>>
>>
>> I've actually being ignoring these since Qemu doesn't seem to care about
>> them.
>>
>>
>>
>>     [...]
>>
>>     > +static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
>>     > +                               uint64_t gpa, IOMMUTLBEntry
>> to_cache,
>>     > +                               uint16_t domid)
>>     > +{
>>     > +    AMDVIIOTLBEntry *entry = g_malloc(sizeof(*entry));
>>     > +    uint64_t *key = g_malloc(sizeof(key));
>>     > +    uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
>>     > +
>>     > +    /* don't cache erroneous translations */
>>     > +    if (to_cache.perm != IOMMU_NONE) {
>>     > +        trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid),
>> PCI_SLOT(devid),
>>     > +                PCI_FUNC(devid), gpa, to_cache.translated_addr);
>>     > +
>>     > +        if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
>>     > +            trace_amdvi_iotlb_reset();
>>
>>     We'd better put this trace into amdvi_iotlb_reset().
>>
>>     > +            amdvi_iotlb_reset(s);
>>     > +        }
>>     > +
>>     > +        entry->gfn = gfn;
>>     > +        entry->domid = domid;
>>     > +        entry->perms = to_cache.perm;
>>     > +        entry->translated_addr = to_cache.translated_addr;
>>     > +        entry->page_mask = to_cache.addr_mask;
>>     > +        *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
>>     > +        g_hash_table_replace(s->iotlb, key, entry);
>>     > +    }
>>     > +}
>>     > +
>>     > +static void amdvi_completion_wait(AMDVIState *s,
>> CMDCompletionWait *wait)
>>     > +{
>>     > +    /* pad the last 3 bits */
>>     > +    hwaddr addr = cpu_to_le64(wait->store_addr << 3);
>>
>>     Is this correct? IMO it should be:
>>
>>       hwaddr addr = le64_to_cpu(wait->store_addr) << 3;
>>
>>     > +    uint64_t data = cpu_to_le64(wait->store_data);
>>
>>     Maybe:
>>
>>       uint64_t data = le64_to_cpu(wait->store_data);
>>
>>     ?
>>
>>
>> I should fix these too.
>>
>>
>>
>>
>>
>>     > +
>>     > +    if (wait->reserved) {
>>     > +        amdvi_log_illegalcom_error(s, wait->type, s->cmdbuf +
>> s->cmdbuf_head);
>>     > +    }
>>     > +
>>     > +    if (wait->completion_store) {
>>     > +        if (dma_memory_write(&address_space_memory, addr, &data,
>>     > +            AMDVI_COMPLETION_DATA_SIZE))
>>     > +        {
>>
>>     Left bracket is better moved upward to follow the coding style.
>>
>>
>> To fix.
>>
>>
>>
>>     > +            trace_amdvi_completion_wait_fail(addr);
>>     > +        }
>>     > +    }
>>
>>     Thanks,
>>
>>     -- peterx
>>
>>
>>


reply via email to

[Prev in Thread] Current Thread [Next in Thread]