qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 00/10] target/i386/tcg: fixes for seg_helper.c


From: Paolo Bonzini
Subject: Re: [PATCH 00/10] target/i386/tcg: fixes for seg_helper.c
Date: Wed, 10 Jul 2024 23:08:28 +0200



Il mer 10 lug 2024, 23:01 Robert Henry <rrh.henry@gmail.com> ha scritto:
I have only skimmed the diffs.  Your knowledge of the deep semantics, gained by close differential reading of intel and amd docs, is truly amazing.  Many thanks for pushing this through!

Thanks for bringing this to our attention too, apart from the practical bug hopefully it will help future readers to have a more precise implementation.

I tried to acknowledge your contribution in the commit messages.

I have 2 nits, perhaps stylistic only.

For code like "sp -= 2" or "sp += 2" followed or preceded by a write to the stack pointer of a uint16_t variable 'x',  would it be better/more robust to rewrite as: "sp -= sizeof(x)"  ?

I think that's intentional because the value subtracted is related to the "stw" or "stl" in the store (likewise for incrementing after a load) more than to the size of x.

There are a lot of masks constructed using -1.  I think it would be clearer to use 0xffffffff (for 32-bit masks) as that reminds the reader that this is a bit mask.  But it seems that using -1 is how the original code was written.

-1 is used for 64-bit masks only. They get unwieldy quickly. :)

Paolo 


On Tue, Jul 9, 2024 at 11:29 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
This includes bugfixes:
- allowing IRET from user mode to user mode with SMAP (do not use implicit
  kernel accesses, which break if the stack is in userspace)

- use DPL-level accesses for interrupts and call gates

- various fixes for task switching

And two related cleanups: computing MMU index once for far calls and returns
(including task switches), and using X86Access for TSS access.

Tested with a really ugly patch to kvm-unit-tests, included after signature.

Paolo Bonzini (7):
  target/i386/tcg: Allow IRET from user mode to user mode with SMAP
  target/i386/tcg: use PUSHL/PUSHW for error code
  target/i386/tcg: Compute MMU index once
  target/i386/tcg: Use DPL-level accesses for interrupts and call gates
  target/i386/tcg: check for correct busy state before switching to a
    new task
  target/i386/tcg: use X86Access for TSS access
  target/i386/tcg: save current task state before loading new one

Richard Henderson (3):
  target/i386/tcg: Remove SEG_ADDL
  target/i386/tcg: Reorg push/pop within seg_helper.c
  target/i386/tcg: Introduce x86_mmu_index_{kernel_,}pl

 target/i386/cpu.h            |  11 +-
 target/i386/cpu.c            |  27 +-
 target/i386/tcg/seg_helper.c | 606 +++++++++++++++++++----------------
 3 files changed, 354 insertions(+), 290 deletions(-)

--
2.45.2

diff --git a/lib/x86/usermode.c b/lib/x86/usermode.c
index c3ec0ad7..0bf40c6d 100644
--- a/lib/x86/usermode.c
+++ b/lib/x86/usermode.c
@@ -5,13 +5,15 @@
 #include "x86/desc.h"
 #include "x86/isr.h"
 #include "alloc.h"
+#include "alloc_page.h"
 #include "setjmp.h"
 #include "usermode.h"

 #include "libcflat.h"
 #include <stdint.h>

-#define USERMODE_STACK_SIZE    0x2000
+#define USERMODE_STACK_ORDER   1 /* 8k */
+#define USERMODE_STACK_SIZE    (1 << (12 + USERMODE_STACK_ORDER))
 #define RET_TO_KERNEL_IRQ      0x20

 static jmp_buf jmpbuf;
@@ -37,9 +39,14 @@ uint64_t run_in_user(usermode_func func, unsigned int fault_vector,
 {
        extern char ret_to_kernel;
        volatile uint64_t rax = 0;
-       static unsigned char user_stack[USERMODE_STACK_SIZE];
+       static unsigned char *user_stack;
        handler old_ex;

+       if (!user_stack) {
+               user_stack = alloc_pages(USERMODE_STACK_ORDER);
+               printf("%p\n", user_stack);
+       }
+
        *raised_vector = 0;
        set_idt_entry(RET_TO_KERNEL_IRQ, &ret_to_kernel, 3);
        old_ex = handle_exception(fault_vector,
@@ -51,6 +58,8 @@ uint64_t run_in_user(usermode_func func, unsigned int fault_vector,
                return 0;
        }

+       memcpy(user_stack + USERMODE_STACK_SIZE - 8, &func, 8);
+
        asm volatile (
                        /* Prepare kernel SP for exception handlers */
                        "mov %%rsp, %[rsp0]\n\t"
@@ -63,12 +72,13 @@ uint64_t run_in_user(usermode_func func, unsigned int fault_vector,
                        "pushq %[user_stack_top]\n\t"
                        "pushfq\n\t"
                        "pushq %[user_cs]\n\t"
-                       "lea user_mode(%%rip), %%rax\n\t"
+                       "lea user_mode+0x800000(%%rip), %%rax\n\t" // smap.flat places usermode addresses at 8MB-16MB
                        "pushq %%rax\n\t"
                        "iretq\n"

                        "user_mode:\n\t"
                        /* Back up volatile registers before invoking func */
+                       "pop %%rax\n\t"
                        "push %%rcx\n\t"
                        "push %%rdx\n\t"
                        "push %%rdi\n\t"
@@ -78,11 +88,12 @@ uint64_t run_in_user(usermode_func func, unsigned int fault_vector,
                        "push %%r10\n\t"
                        "push %%r11\n\t"
                        /* Call user mode function */
+                       "add $0x800000,%%rbp\n\t"
                        "mov %[arg1], %%rdi\n\t"
                        "mov %[arg2], %%rsi\n\t"
                        "mov %[arg3], %%rdx\n\t"
                        "mov %[arg4], %%rcx\n\t"
-                       "call *%[func]\n\t"
+                       "call *%%rax\n\t"
                        /* Restore registers */
                        "pop %%r11\n\t"
                        "pop %%r10\n\t"
@@ -112,12 +123,11 @@ uint64_t run_in_user(usermode_func func, unsigned int fault_vector,
                        [arg2]"m"(arg2),
                        [arg3]"m"(arg3),
                        [arg4]"m"(arg4),
-                       [func]"m"(func),
                        [user_ds]"i"(USER_DS),
                        [user_cs]"i"(USER_CS),
                        [kernel_ds]"rm"(KERNEL_DS),
                        [user_stack_top]"r"(user_stack +
-                                       sizeof(user_stack)),
+                                       USERMODE_STACK_SIZE - 8),
                        [kernel_entry_vector]"i"(RET_TO_KERNEL_IRQ));

        handle_exception(fault_vector, old_ex);
diff --git a/x86/smap.c b/x86/smap.c
index 9a823a55..65119442 100644
--- a/x86/smap.c
+++ b/x86/smap.c
@@ -2,6 +2,7 @@
 #include <alloc_page.h>
 #include "x86/desc.h"
 #include "x86/processor.h"
+#include "x86/usermode.h"
 #include "x86/vm.h"

 volatile int pf_count = 0;
@@ -89,6 +90,31 @@ static void check_smap_nowp(void)
        write_cr3(read_cr3());
 }

+#ifdef __x86_64__
+static void iret(void)
+{
+       asm volatile(
+           "mov %%rsp, %%rcx;"
+           "movl %%ss, %%ebx; pushq %%rbx; pushq %%rcx;"
+           "pushf;"
+           "movl %%cs, %%ebx; pushq %%rbx; "
+           "lea 1f(%%rip), %%rbx; pushq %%rbx; iretq; 1:"
+
+               : : : "ebx", "ecx", "cc"); /* RPL=0 */
+}
+
+static void test_user_iret(void)
+{
+       bool raised_vector;
+       uintptr_t user_iret = (uintptr_t)iret + USER_BASE;
+
+       run_in_user((usermode_func)user_iret, PF_VECTOR, 0, 0, 0, 0,
+                   &raised_vector);
+
+       report(!raised_vector, "No #PF on CPL=3 DPL=3 iret");
+}
+#endif
+
 int main(int ac, char **av)
 {
        unsigned long i;
@@ -196,7 +222,9 @@ int main(int ac, char **av)

        check_smap_nowp();

-       // TODO: implicit kernel access from ring 3 (e.g. int)
+#ifdef __x86_64__
+       test_user_iret();
+#endif

        return report_summary();
 }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]