[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH V2 01/11] machine: alloc-anon option
From: |
Igor Mammedov |
Subject: |
Re: [PATCH V2 01/11] machine: alloc-anon option |
Date: |
Tue, 16 Jul 2024 11:19:55 +0200 |
On Sun, 30 Jun 2024 12:40:24 -0700
Steve Sistare <steven.sistare@oracle.com> wrote:
> Allocate anonymous memory using mmap MAP_ANON or memfd_create depending
> on the value of the anon-alloc machine property. This affects
> memory-backend-ram objects, guest RAM created with the global -m option
> but without an associated memory-backend object and without the -mem-path
> option
nowadays, all machines were converted to use memory backend for VM RAM.
so -m option implicitly creates memory-backend object,
which will be either MEMORY_BACKEND_FILE if -mem-path present
or MEMORY_BACKEND_RAM otherwise.
> To access the same memory in the old and new QEMU processes, the memory
> must be mapped shared. Therefore, the implementation always sets
> RAM_SHARED if alloc-anon=memfd, except for memory-backend-ram, where the
> user must explicitly specify the share option. In lieu of defining a new
so statement at the top that memory-backend-ram is affected is not
really valid?
> RAM flag, at the lowest level the implementation uses RAM_SHARED with fd=-1
> as the condition for calling memfd_create.
In general I do dislike adding yet another option that will affect
guest RAM allocation (memory-backends should be sufficient).
However I do see that you need memfd for device memory (vram, roms, ...).
Can we just use memfd/shared unconditionally for those and
avoid introducing a new confusing option?
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
> hw/core/machine.c | 24 ++++++++++++++++++++++++
> include/hw/boards.h | 1 +
> qapi/machine.json | 14 ++++++++++++++
> qemu-options.hx | 13 +++++++++++++
> system/memory.c | 12 +++++++++---
> system/physmem.c | 38 +++++++++++++++++++++++++++++++++++++-
> system/trace-events | 3 +++
> 7 files changed, 101 insertions(+), 4 deletions(-)
>
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 655d75c..7ca2ad0 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -454,6 +454,20 @@ static void machine_set_mem_merge(Object *obj, bool
> value, Error **errp)
> ms->mem_merge = value;
> }
>
> +static int machine_get_anon_alloc(Object *obj, Error **errp)
> +{
> + MachineState *ms = MACHINE(obj);
> +
> + return ms->anon_alloc;
> +}
> +
> +static void machine_set_anon_alloc(Object *obj, int value, Error **errp)
> +{
> + MachineState *ms = MACHINE(obj);
> +
> + ms->anon_alloc = value;
> +}
> +
> static bool machine_get_usb(Object *obj, Error **errp)
> {
> MachineState *ms = MACHINE(obj);
> @@ -1066,6 +1080,11 @@ static void machine_class_init(ObjectClass *oc, void
> *data)
> object_class_property_set_description(oc, "mem-merge",
> "Enable/disable memory merge support");
>
> + object_class_property_add_enum(oc, "anon-alloc", "AnonAllocOption",
> + &AnonAllocOption_lookup,
> + machine_get_anon_alloc,
> + machine_set_anon_alloc);
> +
> object_class_property_add_bool(oc, "usb",
> machine_get_usb, machine_set_usb);
> object_class_property_set_description(oc, "usb",
> @@ -1416,6 +1435,11 @@ static bool create_default_memdev(MachineState *ms,
> const char *path, Error **er
> if (!object_property_set_int(obj, "size", ms->ram_size, errp)) {
> goto out;
> }
> + if (!object_property_set_bool(obj, "share",
> + ms->anon_alloc == ANON_ALLOC_OPTION_MEMFD,
> + errp)) {
> + goto out;
> + }
> object_property_add_child(object_get_objects_root(), mc->default_ram_id,
> obj);
> /* Ensure backend's memory region name is equal to mc->default_ram_id */
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index 73ad319..77f16ad 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -383,6 +383,7 @@ struct MachineState {
> bool enable_graphics;
> ConfidentialGuestSupport *cgs;
> HostMemoryBackend *memdev;
> + AnonAllocOption anon_alloc;
> /*
> * convenience alias to ram_memdev_id backend memory region
> * or to numa container memory region
> diff --git a/qapi/machine.json b/qapi/machine.json
> index 2fd3e9c..9173953 100644
> --- a/qapi/machine.json
> +++ b/qapi/machine.json
> @@ -1881,3 +1881,17 @@
> { 'command': 'x-query-interrupt-controllers',
> 'returns': 'HumanReadableText',
> 'features': [ 'unstable' ]}
> +
> +##
> +# @AnonAllocOption:
> +#
> +# An enumeration of the options for allocating anonymous guest memory.
> +#
> +# @mmap: allocate using mmap MAP_ANON
> +#
> +# @memfd: allocate using memfd_create
> +#
> +# Since: 9.1
> +##
> +{ 'enum': 'AnonAllocOption',
> + 'data': [ 'mmap', 'memfd' ] }
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 8ca7f34..595b693 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -38,6 +38,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
> " nvdimm=on|off controls NVDIMM support (default=off)\n"
> " memory-encryption=@var{} memory encryption object to
> use (default=none)\n"
> " hmat=on|off controls ACPI HMAT support (default=off)\n"
> + " anon-alloc=mmap|memfd allocate anonymous guest RAM
> using mmap MAP_ANON or memfd_create (default: mmap)\n"
> " memory-backend='backend-id' specifies explicitly
> provided backend for main RAM (default=none)\n"
> "
> cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]\n",
> QEMU_ARCH_ALL)
> @@ -101,6 +102,18 @@ SRST
> Enables or disables ACPI Heterogeneous Memory Attribute Table
> (HMAT) support. The default is off.
>
> + ``anon-alloc=mmap|memfd``
> + Allocate anonymous guest RAM using mmap MAP_ANON (the default)
> + or memfd_create. This affects memory-backend-ram objects,
> + RAM created with the global -m option but without an
> + associated memory-backend object and without the -mem-path
> + option, and various memory regions such as ROMs that are
> + allocated when devices are created. This option does not
> + affect memory-backend-file, memory-backend-memfd, or
> + memory-backend-epc objects.
> +
> + Some migration modes require anon-alloc=memfd.
> +
> ``memory-backend='id'``
> An alternative to legacy ``-mem-path`` and ``mem-prealloc`` options.
> Allows to use a memory backend as main RAM.
> diff --git a/system/memory.c b/system/memory.c
> index 2d69521..28a837d 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -1552,8 +1552,10 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr,
> uint64_t size,
> Error **errp)
> {
> + uint32_t flags = (current_machine->anon_alloc ==
> ANON_ALLOC_OPTION_MEMFD) ?
> + RAM_SHARED : 0;
> return memory_region_init_ram_flags_nomigrate(mr, owner, name,
> - size, 0, errp);
> + size, flags, errp);
> }
>
> bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr,
> @@ -1713,8 +1715,10 @@ bool memory_region_init_rom_nomigrate(MemoryRegion *mr,
> uint64_t size,
> Error **errp)
> {
> + uint32_t flags = (current_machine->anon_alloc ==
> ANON_ALLOC_OPTION_MEMFD) ?
> + RAM_SHARED : 0;
> if (!memory_region_init_ram_flags_nomigrate(mr, owner, name,
> - size, 0, errp)) {
> + size, flags, errp)) {
> return false;
> }
> mr->readonly = true;
> @@ -1731,6 +1735,8 @@ bool
> memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
> Error **errp)
> {
> Error *err = NULL;
> + uint32_t flags = (current_machine->anon_alloc ==
> ANON_ALLOC_OPTION_MEMFD) ?
> + RAM_SHARED : 0;
> assert(ops);
> memory_region_init(mr, owner, name, size);
> mr->ops = ops;
> @@ -1738,7 +1744,7 @@ bool
> memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
> mr->terminates = true;
> mr->rom_device = true;
> mr->destructor = memory_region_destructor_ram;
> - mr->ram_block = qemu_ram_alloc(size, 0, mr, &err);
> + mr->ram_block = qemu_ram_alloc(size, flags, mr, &err);
> if (err) {
> mr->size = int128_zero();
> object_unparent(OBJECT(mr));
> diff --git a/system/physmem.c b/system/physmem.c
> index 33d09f7..efe95ff 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -47,6 +47,7 @@
> #include "qemu/qemu-print.h"
> #include "qemu/log.h"
> #include "qemu/memalign.h"
> +#include "qemu/memfd.h"
> #include "exec/memory.h"
> #include "exec/ioport.h"
> #include "sysemu/dma.h"
> @@ -54,6 +55,7 @@
> #include "sysemu/hw_accel.h"
> #include "sysemu/xen-mapcache.h"
> #include "trace/trace-root.h"
> +#include "trace.h"
>
> #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> #include <linux/falloc.h>
> @@ -69,6 +71,8 @@
>
> #include "qemu/pmem.h"
>
> +#include "qapi/qapi-types-migration.h"
> +#include "migration/options.h"
> #include "migration/vmstate.h"
>
> #include "qemu/range.h"
> @@ -1828,6 +1832,32 @@ static void ram_block_add(RAMBlock *new_block, Error
> **errp)
> qemu_mutex_unlock_ramlist();
> return;
> }
> +
> + } else if (new_block->flags & RAM_SHARED) {
> + size_t max_length = new_block->max_length;
> + MemoryRegion *mr = new_block->mr;
> + const char *name = memory_region_name(mr);
> +
> + new_block->mr->align = QEMU_VMALLOC_ALIGN;
> +
> + if (new_block->fd == -1) {
> + new_block->fd = qemu_memfd_create(name, max_length +
> mr->align,
> + 0, 0, 0, errp);
> + }
> +
> + if (new_block->fd >= 0) {
> + int mfd = new_block->fd;
> + qemu_set_cloexec(mfd);
> + new_block->host = file_ram_alloc(new_block, max_length, mfd,
> + false, 0, errp);
> + }
> + if (!new_block->host) {
> + qemu_mutex_unlock_ramlist();
> + return;
> + }
> + memory_try_enable_merging(new_block->host,
> new_block->max_length);
> + free_on_error = true;
> +
> } else {
> new_block->host = qemu_anon_ram_alloc(new_block->max_length,
> &new_block->mr->align,
> @@ -1911,6 +1941,9 @@ static void ram_block_add(RAMBlock *new_block, Error
> **errp)
> ram_block_notify_add(new_block->host, new_block->used_length,
> new_block->max_length);
> }
> + trace_ram_block_add(memory_region_name(new_block->mr), new_block->flags,
> + new_block->fd, new_block->used_length,
> + new_block->max_length);
> return;
>
> out_free:
> @@ -2097,8 +2130,11 @@ RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size,
> ram_addr_t maxsz,
> void *host),
> MemoryRegion *mr, Error **errp)
> {
> + uint32_t flags = (current_machine->anon_alloc ==
> ANON_ALLOC_OPTION_MEMFD) ?
> + RAM_SHARED : 0;
> + flags |= RAM_RESIZEABLE;
> return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
> - RAM_RESIZEABLE, mr, errp);
> + flags, mr, errp);
> }
>
> static void reclaim_ramblock(RAMBlock *block)
> diff --git a/system/trace-events b/system/trace-events
> index 69c9044..f8ebf42 100644
> --- a/system/trace-events
> +++ b/system/trace-events
> @@ -38,3 +38,6 @@ dirtylimit_state_finalize(void)
> dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us)
> "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
> dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page
> rate limit %"PRIu64
> dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep
> %"PRIi64 " us"
> +
> +#physmem.c
> +ram_block_add(const char *name, uint32_t flags, int fd, size_t used_length,
> size_t max_length) "%s, flags %u, fd %d, len %lu, maxlen %lu"