qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH V2 01/11] machine: alloc-anon option


From: Igor Mammedov
Subject: Re: [PATCH V2 01/11] machine: alloc-anon option
Date: Tue, 16 Jul 2024 11:19:55 +0200

On Sun, 30 Jun 2024 12:40:24 -0700
Steve Sistare <steven.sistare@oracle.com> wrote:

> Allocate anonymous memory using mmap MAP_ANON or memfd_create depending
> on the value of the anon-alloc machine property.  This affects
> memory-backend-ram objects, guest RAM created with the global -m option
> but without an associated memory-backend object and without the -mem-path
> option
nowadays, all machines were converted to use memory backend for VM RAM.
so -m option implicitly creates memory-backend object,
which will be either MEMORY_BACKEND_FILE if -mem-path present
or MEMORY_BACKEND_RAM otherwise.


> To access the same memory in the old and new QEMU processes, the memory
> must be mapped shared.  Therefore, the implementation always sets

> RAM_SHARED if alloc-anon=memfd, except for memory-backend-ram, where the
> user must explicitly specify the share option.  In lieu of defining a new
so statement at the top that memory-backend-ram is affected is not
really valid? 

> RAM flag, at the lowest level the implementation uses RAM_SHARED with fd=-1
> as the condition for calling memfd_create.

In general I do dislike adding yet another option that will affect
guest RAM allocation (memory-backends  should be sufficient).

However I do see that you need memfd for device memory (vram, roms, ...).
Can we just use memfd/shared unconditionally for those and
avoid introducing a new confusing option?


> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  hw/core/machine.c   | 24 ++++++++++++++++++++++++
>  include/hw/boards.h |  1 +
>  qapi/machine.json   | 14 ++++++++++++++
>  qemu-options.hx     | 13 +++++++++++++
>  system/memory.c     | 12 +++++++++---
>  system/physmem.c    | 38 +++++++++++++++++++++++++++++++++++++-
>  system/trace-events |  3 +++
>  7 files changed, 101 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 655d75c..7ca2ad0 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -454,6 +454,20 @@ static void machine_set_mem_merge(Object *obj, bool 
> value, Error **errp)
>      ms->mem_merge = value;
>  }
>  
> +static int machine_get_anon_alloc(Object *obj, Error **errp)
> +{
> +    MachineState *ms = MACHINE(obj);
> +
> +    return ms->anon_alloc;
> +}
> +
> +static void machine_set_anon_alloc(Object *obj, int value, Error **errp)
> +{
> +    MachineState *ms = MACHINE(obj);
> +
> +    ms->anon_alloc = value;
> +}
> +
>  static bool machine_get_usb(Object *obj, Error **errp)
>  {
>      MachineState *ms = MACHINE(obj);
> @@ -1066,6 +1080,11 @@ static void machine_class_init(ObjectClass *oc, void 
> *data)
>      object_class_property_set_description(oc, "mem-merge",
>          "Enable/disable memory merge support");
>  
> +    object_class_property_add_enum(oc, "anon-alloc", "AnonAllocOption",
> +                                   &AnonAllocOption_lookup,
> +                                   machine_get_anon_alloc,
> +                                   machine_set_anon_alloc);
> +
>      object_class_property_add_bool(oc, "usb",
>          machine_get_usb, machine_set_usb);
>      object_class_property_set_description(oc, "usb",
> @@ -1416,6 +1435,11 @@ static bool create_default_memdev(MachineState *ms, 
> const char *path, Error **er
>      if (!object_property_set_int(obj, "size", ms->ram_size, errp)) {
>          goto out;
>      }
> +    if (!object_property_set_bool(obj, "share",
> +                                  ms->anon_alloc == ANON_ALLOC_OPTION_MEMFD,
> +                                  errp)) {
> +        goto out;
> +    }
>      object_property_add_child(object_get_objects_root(), mc->default_ram_id,
>                                obj);
>      /* Ensure backend's memory region name is equal to mc->default_ram_id */
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index 73ad319..77f16ad 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -383,6 +383,7 @@ struct MachineState {
>      bool enable_graphics;
>      ConfidentialGuestSupport *cgs;
>      HostMemoryBackend *memdev;
> +    AnonAllocOption anon_alloc;
>      /*
>       * convenience alias to ram_memdev_id backend memory region
>       * or to numa container memory region
> diff --git a/qapi/machine.json b/qapi/machine.json
> index 2fd3e9c..9173953 100644
> --- a/qapi/machine.json
> +++ b/qapi/machine.json
> @@ -1881,3 +1881,17 @@
>  { 'command': 'x-query-interrupt-controllers',
>    'returns': 'HumanReadableText',
>    'features': [ 'unstable' ]}
> +
> +##
> +# @AnonAllocOption:
> +#
> +# An enumeration of the options for allocating anonymous guest memory.
> +#
> +# @mmap: allocate using mmap MAP_ANON
> +#
> +# @memfd: allocate using memfd_create
> +#
> +# Since: 9.1
> +##
> +{ 'enum': 'AnonAllocOption',
> +  'data': [ 'mmap', 'memfd' ] }
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 8ca7f34..595b693 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -38,6 +38,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
>      "                nvdimm=on|off controls NVDIMM support (default=off)\n"
>      "                memory-encryption=@var{} memory encryption object to 
> use (default=none)\n"
>      "                hmat=on|off controls ACPI HMAT support (default=off)\n"
> +    "                anon-alloc=mmap|memfd allocate anonymous guest RAM 
> using mmap MAP_ANON or memfd_create (default: mmap)\n"
>      "                memory-backend='backend-id' specifies explicitly 
> provided backend for main RAM (default=none)\n"
>      "                
> cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]\n",
>      QEMU_ARCH_ALL)
> @@ -101,6 +102,18 @@ SRST
>          Enables or disables ACPI Heterogeneous Memory Attribute Table
>          (HMAT) support. The default is off.
>  
> +    ``anon-alloc=mmap|memfd``
> +        Allocate anonymous guest RAM using mmap MAP_ANON (the default)
> +        or memfd_create.  This affects memory-backend-ram objects,
> +        RAM created with the global -m option but without an
> +        associated memory-backend object and without the -mem-path
> +        option, and various memory regions such as ROMs that are
> +        allocated when devices are created.  This option does not
> +        affect memory-backend-file, memory-backend-memfd, or
> +        memory-backend-epc objects.
> +
> +        Some migration modes require anon-alloc=memfd.
> +
>      ``memory-backend='id'``
>          An alternative to legacy ``-mem-path`` and ``mem-prealloc`` options.
>          Allows to use a memory backend as main RAM.
> diff --git a/system/memory.c b/system/memory.c
> index 2d69521..28a837d 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -1552,8 +1552,10 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr,
>                                        uint64_t size,
>                                        Error **errp)
>  {
> +    uint32_t flags = (current_machine->anon_alloc == 
> ANON_ALLOC_OPTION_MEMFD) ?
> +                     RAM_SHARED : 0;
>      return memory_region_init_ram_flags_nomigrate(mr, owner, name,
> -                                                  size, 0, errp);
> +                                                  size, flags, errp);
>  }
>  
>  bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr,
> @@ -1713,8 +1715,10 @@ bool memory_region_init_rom_nomigrate(MemoryRegion *mr,
>                                        uint64_t size,
>                                        Error **errp)
>  {
> +    uint32_t flags = (current_machine->anon_alloc == 
> ANON_ALLOC_OPTION_MEMFD) ?
> +                     RAM_SHARED : 0;
>      if (!memory_region_init_ram_flags_nomigrate(mr, owner, name,
> -                                                size, 0, errp)) {
> +                                                size, flags, errp)) {
>           return false;
>      }
>      mr->readonly = true;
> @@ -1731,6 +1735,8 @@ bool 
> memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
>                                               Error **errp)
>  {
>      Error *err = NULL;
> +    uint32_t flags = (current_machine->anon_alloc == 
> ANON_ALLOC_OPTION_MEMFD) ?
> +                     RAM_SHARED : 0;
>      assert(ops);
>      memory_region_init(mr, owner, name, size);
>      mr->ops = ops;
> @@ -1738,7 +1744,7 @@ bool 
> memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
>      mr->terminates = true;
>      mr->rom_device = true;
>      mr->destructor = memory_region_destructor_ram;
> -    mr->ram_block = qemu_ram_alloc(size, 0, mr, &err);
> +    mr->ram_block = qemu_ram_alloc(size, flags, mr, &err);
>      if (err) {
>          mr->size = int128_zero();
>          object_unparent(OBJECT(mr));
> diff --git a/system/physmem.c b/system/physmem.c
> index 33d09f7..efe95ff 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -47,6 +47,7 @@
>  #include "qemu/qemu-print.h"
>  #include "qemu/log.h"
>  #include "qemu/memalign.h"
> +#include "qemu/memfd.h"
>  #include "exec/memory.h"
>  #include "exec/ioport.h"
>  #include "sysemu/dma.h"
> @@ -54,6 +55,7 @@
>  #include "sysemu/hw_accel.h"
>  #include "sysemu/xen-mapcache.h"
>  #include "trace/trace-root.h"
> +#include "trace.h"
>  
>  #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>  #include <linux/falloc.h>
> @@ -69,6 +71,8 @@
>  
>  #include "qemu/pmem.h"
>  
> +#include "qapi/qapi-types-migration.h"
> +#include "migration/options.h"
>  #include "migration/vmstate.h"
>  
>  #include "qemu/range.h"
> @@ -1828,6 +1832,32 @@ static void ram_block_add(RAMBlock *new_block, Error 
> **errp)
>                  qemu_mutex_unlock_ramlist();
>                  return;
>              }
> +
> +        } else if (new_block->flags & RAM_SHARED) {
> +            size_t max_length = new_block->max_length;
> +            MemoryRegion *mr = new_block->mr;
> +            const char *name = memory_region_name(mr);
> +
> +            new_block->mr->align = QEMU_VMALLOC_ALIGN;
> +
> +            if (new_block->fd == -1) {
> +                new_block->fd = qemu_memfd_create(name, max_length + 
> mr->align,
> +                                                  0, 0, 0, errp);
> +            }
> +
> +            if (new_block->fd >= 0) {
> +                int mfd = new_block->fd;
> +                qemu_set_cloexec(mfd);
> +                new_block->host = file_ram_alloc(new_block, max_length, mfd,
> +                                                 false, 0, errp);
> +            }
> +            if (!new_block->host) {
> +                qemu_mutex_unlock_ramlist();
> +                return;
> +            }
> +            memory_try_enable_merging(new_block->host, 
> new_block->max_length);
> +            free_on_error = true;
> +
>          } else {
>              new_block->host = qemu_anon_ram_alloc(new_block->max_length,
>                                                    &new_block->mr->align,
> @@ -1911,6 +1941,9 @@ static void ram_block_add(RAMBlock *new_block, Error 
> **errp)
>          ram_block_notify_add(new_block->host, new_block->used_length,
>                               new_block->max_length);
>      }
> +    trace_ram_block_add(memory_region_name(new_block->mr), new_block->flags,
> +                        new_block->fd, new_block->used_length,
> +                        new_block->max_length);
>      return;
>  
>  out_free:
> @@ -2097,8 +2130,11 @@ RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, 
> ram_addr_t maxsz,
>                                                       void *host),
>                                       MemoryRegion *mr, Error **errp)
>  {
> +    uint32_t flags = (current_machine->anon_alloc == 
> ANON_ALLOC_OPTION_MEMFD) ?
> +                     RAM_SHARED : 0;
> +    flags |= RAM_RESIZEABLE;
>      return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
> -                                   RAM_RESIZEABLE, mr, errp);
> +                                   flags, mr, errp);
>  }
>  
>  static void reclaim_ramblock(RAMBlock *block)
> diff --git a/system/trace-events b/system/trace-events
> index 69c9044..f8ebf42 100644
> --- a/system/trace-events
> +++ b/system/trace-events
> @@ -38,3 +38,6 @@ dirtylimit_state_finalize(void)
>  dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) 
> "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
>  dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page 
> rate limit %"PRIu64
>  dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep 
> %"PRIi64 " us"
> +
> +#physmem.c
> +ram_block_add(const char *name, uint32_t flags, int fd, size_t used_length, 
> size_t max_length) "%s, flags %u, fd %d, len %lu, maxlen %lu"




reply via email to

[Prev in Thread] Current Thread [Next in Thread]