[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 10/15] exec: RCUify AddressSpaceDispatch
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH 10/15] exec: RCUify AddressSpaceDispatch |
Date: |
Wed, 28 Jan 2015 10:44:33 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.4.0 |
On 28/01/2015 06:45, Fam Zheng wrote:
> On Thu, 01/22 15:47, Paolo Bonzini wrote:
>> Note that even after this patch, most callers of address_space_*
>> functions must still be under the big QEMU lock, otherwise the memory
>> region returned by address_space_translate can disappear as soon as
>> address_space_translate returns. This will be fixed in the next part
>> of this series.
>>
>> Signed-off-by: Paolo Bonzini <address@hidden>
>> ---
>> cpu-exec.c | 13 ++++++++++++-
>> cpus.c | 2 +-
>> cputlb.c | 8 ++++++--
>> exec.c | 34 ++++++++++++++++++++++++++--------
>> hw/i386/intel_iommu.c | 3 +++
>> hw/pci-host/apb.c | 1 +
>> hw/ppc/spapr_iommu.c | 1 +
>> include/exec/exec-all.h | 1 +
>> 8 files changed, 51 insertions(+), 12 deletions(-)
>>
>> diff --git a/cpu-exec.c b/cpu-exec.c
>> index 12473ff..edc5eb9 100644
>> --- a/cpu-exec.c
>> +++ b/cpu-exec.c
>> @@ -26,6 +26,7 @@
>> #include "qemu/timer.h"
>> #include "exec/address-spaces.h"
>> #include "exec/memory-internal.h"
>> +#include "qemu/rcu.h"
>>
>> /* -icount align implementation. */
>>
>> @@ -149,8 +150,15 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc)
>>
>> void cpu_reload_memory_map(CPUState *cpu)
>> {
>> + AddressSpaceDispatch *d;
>> +
>> + if (qemu_in_vcpu_thread()) {
>> + rcu_read_unlock();
>
> Could you explain why we need to split the critical section? Maybe a line of
> comment helps here.
It is needed because otherwise the guest could prolong the critical
section as much as it desires.
Currently, this is prevented by the I/O thread's kicking of the VCPU
thread (iothread_requesting_mutex, qemu_cpu_kick_thread) but this will
go away once TCG's execution moves out of the global mutex.
This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which
only protects cpu->as->dispatch. Since we reload it below, we can split
the critical section.
Paolo
>> + rcu_read_lock();
>> + }
>> +
>> /* The CPU and TLB are protected by the iothread lock. */
>> - AddressSpaceDispatch *d = cpu->as->dispatch;
>> + d = atomic_rcu_read(&cpu->as->dispatch);
>> cpu->memory_dispatch = d;
>> tlb_flush(cpu, 1);
>> }
>> @@ -365,6 +373,8 @@ int cpu_exec(CPUArchState *env)
>> * an instruction scheduling constraint on modern architectures. */
>> smp_mb();
>>
>> + rcu_read_lock();
>> +
>> if (unlikely(exit_request)) {
>> cpu->exit_request = 1;
>> }
>> @@ -567,6 +577,7 @@ int cpu_exec(CPUArchState *env)
>> } /* for(;;) */
>>
>> cc->cpu_exec_exit(cpu);
>> + rcu_read_unlock();
>>
>> /* fail safe : never use current_cpu outside cpu_exec() */
>> current_cpu = NULL;
>> diff --git a/cpus.c b/cpus.c
>> index 3a5323b..b02c793 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -1121,7 +1121,7 @@ bool qemu_cpu_is_self(CPUState *cpu)
>> return qemu_thread_is_self(cpu->thread);
>> }
>>
>> -static bool qemu_in_vcpu_thread(void)
>> +bool qemu_in_vcpu_thread(void)
>> {
>> return current_cpu && qemu_cpu_is_self(current_cpu);
>> }
>> diff --git a/cputlb.c b/cputlb.c
>> index f92db5e..38f2151 100644
>> --- a/cputlb.c
>> +++ b/cputlb.c
>> @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env,
>> target_ulong vaddr,
>> }
>>
>> /* Add a new TLB entry. At most one entry for a given virtual address
>> - is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
>> - supplied size is only used by tlb_flush_page. */
>> + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
>> + * supplied size is only used by tlb_flush_page.
>> + *
>> + * Called from TCG-generated code, which is under an RCU read-side
>> + * critical section.
>> + */
>> void tlb_set_page(CPUState *cpu, target_ulong vaddr,
>> hwaddr paddr, int prot,
>> int mmu_idx, target_ulong size)
>> diff --git a/exec.c b/exec.c
>> index 762ec76..262e8bc 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -115,6 +115,8 @@ struct PhysPageEntry {
>> typedef PhysPageEntry Node[P_L2_SIZE];
>>
>> typedef struct PhysPageMap {
>> + struct rcu_head rcu;
>> +
>> unsigned sections_nb;
>> unsigned sections_nb_alloc;
>> unsigned nodes_nb;
>> @@ -124,6 +126,8 @@ typedef struct PhysPageMap {
>> } PhysPageMap;
>>
>> struct AddressSpaceDispatch {
>> + struct rcu_head rcu;
>> +
>> /* This is a multi-level map on the physical address space.
>> * The bottom level has pointers to MemoryRegionSections.
>> */
>> @@ -315,6 +319,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
>> && mr != &io_mem_watch;
>> }
>>
>> +/* Called from RCU critical section */
>> static MemoryRegionSection
>> *address_space_lookup_region(AddressSpaceDispatch *d,
>> hwaddr addr,
>> bool
>> resolve_subpage)
>> @@ -330,6 +335,7 @@ static MemoryRegionSection
>> *address_space_lookup_region(AddressSpaceDispatch *d,
>> return section;
>> }
>>
>> +/* Called from RCU critical section */
>> static MemoryRegionSection *
>> address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr,
>> hwaddr *xlat,
>> hwaddr *plen, bool resolve_subpage)
>> @@ -370,8 +376,10 @@ MemoryRegion *address_space_translate(AddressSpace *as,
>> hwaddr addr,
>> MemoryRegion *mr;
>> hwaddr len = *plen;
>>
>> + rcu_read_lock();
>> for (;;) {
>> - section = address_space_translate_internal(as->dispatch, addr,
>> &addr, plen, true);
>> + AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
>> + section = address_space_translate_internal(d, addr, &addr, plen,
>> true);
>> mr = section->mr;
>>
>> if (!mr->iommu_ops) {
>> @@ -397,9 +405,11 @@ MemoryRegion *address_space_translate(AddressSpace *as,
>> hwaddr addr,
>>
>> *plen = len;
>> *xlat = addr;
>> + rcu_read_unlock();
>> return mr;
>> }
>>
>> +/* Called from RCU critical section */
>> MemoryRegionSection *
>> address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
>> hwaddr *xlat, hwaddr *plen)
>> @@ -852,6 +862,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool
>> enable)
>> in_migration = enable;
>> }
>>
>> +/* Called from RCU critical section */
>> hwaddr memory_region_section_get_iotlb(CPUState *cpu,
>> MemoryRegionSection *section,
>> target_ulong vaddr,
>> @@ -1963,7 +1974,8 @@ static uint16_t dummy_section(PhysPageMap *map,
>> AddressSpace *as,
>>
>> MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
>> {
>> - MemoryRegionSection *sections = cpu->memory_dispatch->map.sections;
>> + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
>> + MemoryRegionSection *sections = d->map.sections;
>>
>> return sections[index & ~TARGET_PAGE_MASK].mr;
>> }
>> @@ -1999,6 +2011,12 @@ static void mem_begin(MemoryListener *listener)
>> as->next_dispatch = d;
>> }
>>
>> +static void address_space_dispatch_free(AddressSpaceDispatch *d)
>> +{
>> + phys_sections_free(&d->map);
>
>
> If I understand it, this code doesn't hold iothread lock when releasing the
> memory region, but in one of the memroy region destructors,
> memory_region_destructor_ram_from_ptr:
>
> void qemu_ram_free_from_ptr(ram_addr_t addr)
> {
> RAMBlock *block;
>
> /* This assumes the iothread lock is taken here too. */
> qemu_mutex_lock_ramlist();
> QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> ...
>
> Is the comment stale or I missed something?
>
> Fam
>
>> + g_free(d);
>> +}
>> +
>> static void mem_commit(MemoryListener *listener)
>> {
>> AddressSpace *as = container_of(listener, AddressSpace,
>> dispatch_listener);
>> @@ -2007,11 +2025,9 @@ static void mem_commit(MemoryListener *listener)
>>
>> phys_page_compact_all(next, next->map.nodes_nb);
>>
>> - as->dispatch = next;
>> -
>> + atomic_rcu_set(&as->dispatch, next);
>> if (cur) {
>> - phys_sections_free(&cur->map);
>> - g_free(cur);
>> + call_rcu(cur, address_space_dispatch_free, rcu);
>> }
>> }
>>
>> @@ -2066,8 +2082,10 @@ void address_space_destroy_dispatch(AddressSpace *as)
>> AddressSpaceDispatch *d = as->dispatch;
>>
>> memory_listener_unregister(&as->dispatch_listener);
>> - g_free(d);
>> - as->dispatch = NULL;
>> + atomic_rcu_set(&as->dispatch, NULL);
>> + if (d) {
>> + call_rcu(d, address_space_dispatch_free, rcu);
>> + }
>> }
>>
>> static void memory_map_init(void)
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 0a4282a..7da70ff 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
>>
>> /* Map dev to context-entry then do a paging-structures walk to do a iommu
>> * translation.
>> + *
>> + * Called from RCU critical section.
>> + *
>> * @bus_num: The bus number
>> * @devfn: The devfn, which is the combined of device and function number
>> * @is_write: The access is a write operation
>> diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
>> index f573875..832b6c7 100644
>> --- a/hw/pci-host/apb.c
>> +++ b/hw/pci-host/apb.c
>> @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void
>> *opaque, int devfn)
>> return &is->iommu_as;
>> }
>>
>> +/* Called from RCU critical section */
>> static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>> bool is_write)
>> {
>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>> index da47474..ba003da 100644
>> --- a/hw/ppc/spapr_iommu.c
>> +++ b/hw/ppc/spapr_iommu.c
>> @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t
>> liobn)
>> return NULL;
>> }
>>
>> +/* Called from RCU critical section */
>> static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr
>> addr,
>> bool is_write)
>> {
>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
>> index bb3fd37..8eb0db3 100644
>> --- a/include/exec/exec-all.h
>> +++ b/include/exec/exec-all.h
>> @@ -96,6 +96,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start,
>> tb_page_addr_t end,
>> void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
>> int is_cpu_write_access);
>> #if !defined(CONFIG_USER_ONLY)
>> +bool qemu_in_vcpu_thread(void);
>> void cpu_reload_memory_map(CPUState *cpu);
>> void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as);
>> /* cputlb.c */
>> --
>> 1.8.3.1
>>
>>
- Re: [Qemu-devel] [PATCH 01/15] rcu: add rcu library, (continued)
- [Qemu-devel] [PATCH 03/15] rcu: allow nesting of rcu_read_lock/rcu_read_unlock, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 05/15] memory: remove assertion on memory_region_destroy, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 06/15] memory: protect current_map by RCU, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 08/15] exec: introduce cpu_reload_memory_map, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 10/15] exec: RCUify AddressSpaceDispatch, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 04/15] rcu: add call_rcu, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 07/15] memory: avoid ref/unref in memory_region_find, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 12/15] exec: protect mru_block with RCU, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 11/15] rcu: introduce RCU-enabled QLIST, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 14/15] exec: convert ram_list to QLIST, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 13/15] cosmetic changes preparing for the following patches, Paolo Bonzini, 2015/01/22
- [Qemu-devel] [PATCH 09/15] exec: make iotlb RCU-friendly, Paolo Bonzini, 2015/01/22