In Linux, the SPM memory will be claimed by hmem-dax driver by default. With
this patch I can use the following config to pass the SPM memory to guest VM.
-object
memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc=on,align=1G,hmem=on
I was thinking to change the option name from "hmem" to "spm" to avoid
confusion.
Thanks,
Zhigang
-----Original Message-----
From: David Hildenbrand <david@redhat.com>
Sent: Friday, December 6, 2024 5:08 AM
To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
On 04.12.24 18:11, Zhigang Luo wrote:
This boolean option 'hmem' allows users to set a memory region from
memory-backend-file as heterogeneous memory. If 'hmem=on', QEMU will
set the flag RAM_HMEM in the RAM block of the corresponding memory
region and set the e820 type to E820_SOFT_RESERVED for this region.
Hi,
./scripts/get_maintainer.pl is your friend to figure out whom to CC on patches.
In general: not a fan. You seem to be abusing memory backend properties
+ RAM flags to merely modify how memory is going to be exposed in the
memory map on x86.
It's not even clear why heterogeneous memory should be exposed like
that, and how reasonable it is to essentially expose all of guest RAM as
E820_SOFT_RESERVED.
Note that the whole "pmem=on" case was very different, because it
required mmap() modifications.
Signed-off-by: Zhigang Luo <Zhigang.Luo@amd.com>
---
backends/hostmem-file.c | 23 +++++++++++++++++++++++
hw/i386/e820_memory_layout.h | 1 +
hw/i386/pc.c | 16 ++++++++++++++++
include/exec/cpu-common.h | 1 +
include/exec/memory.h | 3 +++
qapi/qom.json | 4 ++++
system/physmem.c | 7 ++++++-
7 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 7e5072e33e..5ddfdbaf86 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -32,6 +32,7 @@ struct HostMemoryBackendFile {
uint64_t offset;
bool discard_data;
bool is_pmem;
+ bool is_hmem;
bool readonly;
OnOffAuto rom;
};
@@ -88,6 +89,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error
**errp)
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
+ ram_flags |= fb->is_hmem ? RAM_HMEM : 0;
ram_flags |= RAM_NAMED_FILE;
return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
name,
backend->size, fb->align,
ram_flags,
@@ -256,6 +258,25 @@ static void file_memory_backend_set_rom(Object *obj,
Visitor *v,
visit_type_OnOffAuto(v, name, &fb->rom, errp);
}
+static bool file_memory_backend_get_hmem(Object *o, Error **errp)
+{
+ return MEMORY_BACKEND_FILE(o)->is_hmem;
+}
+
+static void file_memory_backend_set_hmem(Object *o, bool value, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+ if (host_memory_backend_mr_inited(backend)) {
+ error_setg(errp, "cannot change property 'hmem' of %s.",
+ object_get_typename(o));
+ return;
+ }
+
+ fb->is_hmem = value;
+}
+
static void file_backend_unparent(Object *obj)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@@ -295,6 +316,8 @@ file_backend_class_init(ObjectClass *oc, void *data)
object_class_property_add_bool(oc, "pmem",
file_memory_backend_get_pmem, file_memory_backend_set_pmem);
#endif
+ object_class_property_add_bool(oc, "hmem",
+ file_memory_backend_get_hmem, file_memory_backend_set_hmem);
object_class_property_add_bool(oc, "readonly",
file_memory_backend_get_readonly,
file_memory_backend_set_readonly);
diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
index b50acfa201..8af6a9cfac 100644
--- a/hw/i386/e820_memory_layout.h
+++ b/hw/i386/e820_memory_layout.h
@@ -15,6 +15,7 @@
#define E820_ACPI 3
#define E820_NVS 4
#define E820_UNUSABLE 5
+#define E820_SOFT_RESERVED 0xEFFFFFFF
struct e820_entry {
uint64_t address;
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 317aaca25a..41e9cc276c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -785,6 +785,21 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms,
uint64_t pci_hole64_size)
return pc_above_4g_end(pcms) - 1;
}
+static int pc_update_hmem_memory(RAMBlock *rb, void *opaque)
+{
+ X86MachineState *x86ms = opaque;
+ ram_addr_t offset;
+ ram_addr_t length;
+
+ if (qemu_ram_is_hmem(rb)) {
+ offset = qemu_ram_get_offset(rb) + (0x100000000ULL -
x86ms->below_4g_mem_size);
+ length = qemu_ram_get_used_length(rb);
+ e820_add_entry(offset, length, E820_SOFT_RESERVED);
+ }
I am pretty sure this will break in NUMA setups, where we have multiple
memory backends mapped in different locations.
The whole "(0x100000000ULL - x86ms->below_4g_mem_size)" looks hacky.
--
Cheers,
David / dhildenb