On 07.11.24 11:21, “William Roche wrote:
From: William Roche <william.roche@oracle.com>
When a memory page is added to the hwpoison_page_list, include
the page size information. This size is the backend real page
size. To better deal with hugepages, we create a single entry
for the entire page.
Signed-off-by: William Roche <william.roche@oracle.com>
---
accel/kvm/kvm-all.c | 8 +++++++-
include/exec/cpu-common.h | 1 +
system/physmem.c | 13 +++++++++++++
3 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 801cff16a5..6dd06f5edf 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1266,6 +1266,7 @@ int kvm_vm_check_extension(KVMState *s, unsigned
int extension)
*/
typedef struct HWPoisonPage {
ram_addr_t ram_addr;
+ size_t page_size;
QLIST_ENTRY(HWPoisonPage) list;
} HWPoisonPage;
@@ -1278,7 +1279,7 @@ static void kvm_unpoison_all(void *param)
QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
QLIST_REMOVE(page, list);
- qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
+ qemu_ram_remap(page->ram_addr, page->page_size);
g_free(page);
I'm curious, can't we simply drop the size parameter from qemu_ram_remap()
completely and determine the page size internally from the RAMBlock that
we are looking up already?
This way, we avoid yet another lookup in qemu_ram_pagesize_from_addr(),
and can just handle it completely in qemu_ram_remap().
In particular, to be future proof, we should also align the offset down to
the pagesize.
I'm thinking about something like this:
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 801cff16a5..8a47aa7258 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1278,7 +1278,7 @@ static void kvm_unpoison_all(void *param)
QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
QLIST_REMOVE(page, list);
- qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
+ qemu_ram_remap(page->ram_addr);
g_free(page);
}
}
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 638dc806a5..50a829d31f 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -67,7 +67,7 @@ typedef uintptr_t ram_addr_t;
/* memory API */
-void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+void qemu_ram_remap(ram_addr_t addr);
/* This should not be used by devices. */
ram_addr_t qemu_ram_addr_from_host(void *ptr);
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
diff --git a/system/physmem.c b/system/physmem.c
index dc1db3a384..5f19bec089 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2167,10 +2167,10 @@ void qemu_ram_free(RAMBlock *block)
}
#ifndef _WIN32
-void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
+void qemu_ram_remap(ram_addr_t addr)
{
RAMBlock *block;
- ram_addr_t offset;
+ ram_addr_t offset, length;
int flags;
void *area, *vaddr;
int prot;
@@ -2178,6 +2178,10 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t
length)
RAMBLOCK_FOREACH(block) {
offset = addr - block->offset;
if (offset < block->max_length) {
+ /* Respect the pagesize of our RAMBlock. */
+ offset = QEMU_ALIGN_DOWN(offset, qemu_ram_pagesize(block));
+ length = qemu_ram_pagesize(block);
+
vaddr = ramblock_ptr(block, offset);
if (block->flags & RAM_PREALLOC) {
;
@@ -2206,6 +2210,8 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t
length)
memory_try_enable_merging(vaddr, length);
qemu_ram_setup_dump(vaddr, length);
}
+
+ break;
}
}
}