bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 4/5] x86_64: refactor segment register handling


From: Luca Dariz
Subject: [PATCH 4/5] x86_64: refactor segment register handling
Date: Sat, 29 Jul 2023 19:47:52 +0200

The actual values are not saved together with the rest of the thread
state, both because it would be quite espensive (reading MSR, unless
rdfsbase instructions are supported, but that's optional) and not
really needed. The only way the user has to change its value is with a
specific RPC, so we can intercept the change easily.  Furthermore,
Leaving the values there exposes them to being corrupted in case of a
double interruption, e.g. an irq is handled just before iretq but
after starting to restore the thread state.  This solution was
suggested by Sergey Bugaev.

* i386/i386/db_trace.c: remove fsbase/gsbase from the registers
  available
* i386/i386/debug_i386.c: remove fsbase/gsbase from the printed thread
  state
* i386/i386/i386asm.sym: remove fsbase/gsbase as it's not needed in
  asm anymore
* i386/i386/pcb.c: point fsbase/gsbase to the new location
* i386/i386/thread.h: move fsbase/gsbase to the machine state
* x86_64/locore.S: generalize segment-handling including es/ds/gs/fs
  and remove fsbase/gsbase handling. Also, factor out kernel segment
  selector setting to a macro.
---
 i386/i386/db_trace.c   |   2 -
 i386/i386/debug_i386.c |   2 -
 i386/i386/i386asm.sym  |   2 -
 i386/i386/pcb.c        |  12 +--
 i386/i386/thread.h     |  22 +++-
 x86_64/locore.S        | 228 +++++++++++++++--------------------------
 6 files changed, 108 insertions(+), 160 deletions(-)

diff --git a/i386/i386/db_trace.c b/i386/i386/db_trace.c
index 2b6ad741..8bd86fa5 100644
--- a/i386/i386/db_trace.c
+++ b/i386/i386/db_trace.c
@@ -78,8 +78,6 @@ struct db_variable db_regs[] = {
        { "r13",(long *)&ddb_regs.r13, db_i386_reg_value },
        { "r14",(long *)&ddb_regs.r14, db_i386_reg_value },
        { "r15",(long *)&ddb_regs.r15, db_i386_reg_value },
-       { "fsb",(long *)&ddb_regs.fsbase,db_i386_reg_value },
-       { "gsb",(long *)&ddb_regs.gsbase,db_i386_reg_value },
 #endif
 };
 struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]);
diff --git a/i386/i386/debug_i386.c b/i386/i386/debug_i386.c
index b5465796..41d032e3 100644
--- a/i386/i386/debug_i386.c
+++ b/i386/i386/debug_i386.c
@@ -40,8 +40,6 @@ void dump_ss(const struct i386_saved_state *st)
                st->r8, st->r9, st->r10, st->r11);
        printf("R12 %016lx R13 %016lx R14 %016lx R15 %016lx\n",
                st->r12, st->r13, st->r14, st->r15);
-       printf("FSBASE %016lx GSBASE %016lx\n",
-               st->fsbase, st->gsbase);
        printf("RIP %016lx EFLAGS %08lx\n", st->eip, st->efl);
 #else
        printf("EAX %08lx EBX %08lx ECX %08lx EDX %08lx\n",
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index fd0be557..1b9b40bb 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -108,8 +108,6 @@ offset      i386_saved_state        r       r12
 offset i386_saved_state        r       r13
 offset i386_saved_state        r       r14
 offset i386_saved_state        r       r15
-offset i386_saved_state        r       fsbase
-offset i386_saved_state        r       gsbase
 #endif
 
 offset i386_interrupt_state    i       eip
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index d1c5fb50..1cf87eb1 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -229,8 +229,8 @@ void switch_ktss(pcb_t pcb)
 #endif /* MACH_PV_DESCRIPTORS */
 
 #if defined(__x86_64__) && !defined(USER32)
-       wrmsr(MSR_REG_FSBASE, pcb->iss.fsbase);
-       wrmsr(MSR_REG_GSBASE, pcb->iss.gsbase);
+       wrmsr(MSR_REG_FSBASE, pcb->ims.sbs.fsbase);
+       wrmsr(MSR_REG_GSBASE, pcb->ims.sbs.gsbase);
 #endif
 
        db_load_context(pcb);
@@ -687,8 +687,8 @@ kern_return_t thread_setstatus(
                             return KERN_INVALID_ARGUMENT;
 
                     state = (struct i386_fsgs_base_state *) tstate;
-                    thread->pcb->iss.fsbase = state->fs_base;
-                    thread->pcb->iss.gsbase = state->gs_base;
+                    thread->pcb->ims.sbs.fsbase = state->fs_base;
+                    thread->pcb->ims.sbs.gsbase = state->gs_base;
                     if (thread == current_thread()) {
                             wrmsr(MSR_REG_FSBASE, state->fs_base);
                             wrmsr(MSR_REG_GSBASE, state->gs_base);
@@ -889,8 +889,8 @@ kern_return_t thread_getstatus(
                             return KERN_INVALID_ARGUMENT;
 
                     state = (struct i386_fsgs_base_state *) tstate;
-                    state->fs_base = thread->pcb->iss.fsbase;
-                    state->gs_base = thread->pcb->iss.gsbase;
+                    state->fs_base = thread->pcb->ims.sbs.fsbase;
+                    state->gs_base = thread->pcb->ims.sbs.gsbase;
                     *count = i386_FSGS_BASE_STATE_COUNT;
                     break;
             }
diff --git a/i386/i386/thread.h b/i386/i386/thread.h
index 2378154f..86a44098 100644
--- a/i386/i386/thread.h
+++ b/i386/i386/thread.h
@@ -51,10 +51,6 @@
  */
 
 struct i386_saved_state {
-#ifdef __x86_64__
-       unsigned long   fsbase;
-       unsigned long   gsbase;
-#endif
        unsigned long   gs;
        unsigned long   fs;
        unsigned long   es;
@@ -162,6 +158,13 @@ struct v86_assist_state {
 #define        V86_IF_PENDING          0x8000  /* unused bit */
 #endif
 
+#if defined(__x86_64__) && !defined(USER32)
+struct i386_segment_base_state {
+       unsigned long fsbase;
+       unsigned long gsbase;
+};
+#endif
+
 /*
  *     i386_interrupt_state:
  *
@@ -206,14 +209,23 @@ struct i386_machine_state {
 #endif
        struct real_descriptor user_gdt[USER_GDT_SLOTS];
        struct i386_debug_state ids;
+#if defined(__x86_64__) && !defined(USER32)
+       struct i386_segment_base_state sbs;
+#endif
 };
 
 typedef struct pcb {
+       /* START of the exception stack.
+        * NOTE: this area is used as exception stack when switching
+        * CPL, and it MUST be big enough to save the thread state and
+        * switch to a proper stack area, even considering recursive
+        * exceptions, otherwise it could corrupt nearby memory */
        struct i386_interrupt_state iis[2];     /* interrupt and NMI */
 #ifdef __x86_64__
-       unsigned long pad;         /* ensure exception stack is aligned to 16 */
+       unsigned long pad;         /* ensure exception stack is aligned to 16 */
 #endif
        struct i386_saved_state iss;
+       /* END of exception stack*/
        struct i386_machine_state ims;
        decl_simple_lock_data(, lock)
        unsigned short init_control;            /* Initial FPU control to set */
diff --git a/x86_64/locore.S b/x86_64/locore.S
index 7127957b..66a9436a 100644
--- a/x86_64/locore.S
+++ b/x86_64/locore.S
@@ -39,6 +39,10 @@
 #include <i386/i386/cpu_number.h>
 #include <i386/i386/xen.h>
 
+
+/*
+ * Helpers for thread state as saved in the pcb area, during trap or irq 
handling
+ */
 #define pusha          \
        pushq %rax      ;\
        pushq %rcx      ;\
@@ -75,45 +79,74 @@
        popq %rcx       ;\
        popq %rax
 
+/*
+ * Note that we have to load the kernel segment registers even if this
+ * is a trap from the kernel, because the kernel uses user segment
+ * registers for copyin/copyout.
+ * (XXX Would it be smarter just to use fs or gs for that?)
+ */
 #ifdef USER32
-#define PUSH_FSGS              \
+#define PUSH_SEGMENTS(reg)     \
+       movq    %ds,reg         ;\
+       pushq   reg             ;\
+       movq    %es,reg         ;\
+       pushq   reg             ;\
        pushq   %fs             ;\
-       pushq   %gs             ;\
-       subq    $16,%rsp
+       pushq   %gs
 #else
-#define PUSH_FSGS              \
+#define PUSH_SEGMENTS(reg)     \
        subq    $32,%rsp
 #endif
 
 #ifdef USER32
-#define POP_FSGS               \
+#define POP_SEGMENTS(reg)      \
        popq    %gs             ;\
        popq    %fs             ;\
-       addq    $16,%rsp
+       popq    reg             ;\
+       movq    reg,%es         ;\
+       popq    reg             ;\
+       movq    reg,%ds
 #else
-#define POP_FSGS               \
+#define POP_SEGMENTS(reg)      \
        addq    $32,%rsp
 #endif
 
 #ifdef USER32
-#define PUSH_FSGS_ISR          \
+#define PUSH_SEGMENTS_ISR(reg) \
+       movq    %ds,reg         ;\
+       pushq   reg             ;\
+       movq    %es,reg         ;\
+       pushq   reg             ;\
        pushq   %fs             ;\
        pushq   %gs
 #else
-#define PUSH_FSGS_ISR          \
-       subq    $16,%rsp
+#define PUSH_SEGMENTS_ISR(reg) \
+       subq    $32,%rsp
 #endif
 
 #ifdef USER32
-#define POP_FSGS_ISR           \
+#define POP_SEGMENTS_ISR(reg)  \
        popq    %gs             ;\
-       popq    %fs
+       popq    %fs             ;\
+       popq    reg             ;\
+       movq    reg,%es         ;\
+       popq    reg             ;\
+       movq    reg,%ds
 #else
-#define POP_FSGS_ISR           \
-       addq    $16,%rsp
+#define POP_SEGMENTS_ISR(reg)  \
+       addq    $32,%rsp
 #endif
 
-
+#ifdef USER32
+#define SET_KERNEL_SEGMENTS           \
+       mov     %ss,%dx /* switch to kernel segments */ ;\
+       mov     %dx,%ds                 ;\
+       mov     %dx,%es                 ;\
+       mov     %dx,%fs                 ;\
+       mov     %dx,%gs
+#else
+#define SET_KERNEL_SEGMENTS
+#endif
 
 /*
  * Fault recovery.
@@ -350,32 +383,17 @@ ENTRY(start_timer)
 /*
  * Trap/interrupt entry points.
  *
- * All traps must create the following save area on the kernel stack:
- *
- *     gs
- *     fs
- *     es
- *     ds
- *     edi
- *     esi
- *     ebp
- *     cr2 if page fault - otherwise unused
- *     ebx
- *     edx
- *     ecx
- *     eax
- *     trap number
- *     error code
- *     eip
- *     cs
- *     eflags
- *     user rsp - if from user
- *     user ss  - if from user
- *     es       - if from V86 thread
- *     ds       - if from V86 thread
- *     fs       - if from V86 thread
- *     gs       - if from V86 thread
+ * All traps must create the i386_saved_state struct on the stack on
+ * entry. Note that:
+ *      - CR2 is only used if the trap is a page fault
+ *      - user_rsp/user_ss are only used if entering from user space
+ *      - v86_regs are used only from V86 threads
+ *         (TODO check if V86 is still used with USER32)
  *
+ * Depending the CPL before entry, the stack might be switched or not;
+ * if entering from user-space the CPU loads TSS->RSP0 in RSP,
+ * otherwise RSP is unchanged. After this, the cpu pushes
+ * SS/RSP/RFLAFS/CS/RIP and optionally ErrorCode and executes the handler.
  */
 
 /* Try to save/show some information when a double fault happens
@@ -426,16 +444,16 @@ trap_check_kernel_exit:
                                        /* check for the kernel exit sequence */
        cmpq    $_kret_iret,16(%rsp)    /* on IRET? */
        je      fault_iret
-#if 0
+#ifdef USER32
        cmpq    $_kret_popl_ds,16(%rsp) /* popping DS? */
        je      fault_popl_ds
        cmpq    $_kret_popl_es,16(%rsp) /* popping ES? */
        je      fault_popl_es
-#endif
        cmpq    $_kret_popl_fs,16(%rsp) /* popping FS? */
        je      fault_popl_fs
        cmpq    $_kret_popl_gs,16(%rsp) /* popping GS? */
        je      fault_popl_gs
+#endif
 take_fault:                            /* if none of the above: */
        jmp     EXT(alltraps)           /* treat as normal trap. */
 
@@ -464,6 +482,7 @@ fault_iret:
        popq    %rax                    /* restore eax */
        jmp     EXT(alltraps)           /* take fault */
 
+#ifdef USER32
 /*
  * Fault restoring a segment register.  The user's registers are still
  * saved on the stack.  The offending segment register has not been
@@ -499,6 +518,7 @@ push_gs:
 push_gsbase:
        pushq   $0
        pushq   $0
+#endif
 push_segregs:
        movq    %rax,R_TRAPNO(%rsp)     /* set trap number */
        movq    %rdx,R_ERR(%rsp)        /* set error code */
@@ -562,23 +582,8 @@ ENTRY(t_page_fault)
 ENTRY(alltraps)
        pusha                           /* save the general registers */
 trap_push_segs:
-       movq    %ds,%rax                        /* and the segment registers */
-       pushq   %rax
-       movq    %es,%rax                        /* and the segment registers */
-       pushq   %rax
-       PUSH_FSGS
-
-       /* Note that we have to load the segment registers
-          even if this is a trap from the kernel,
-          because the kernel uses user segment registers for copyin/copyout.
-          (XXX Would it be smarter just to use fs or gs for that?)  */
-       mov     %ss,%ax                 /* switch to kernel data segment */
-       mov     %ax,%ds                 /* (same as kernel stack segment) */
-       mov     %ax,%es
-#ifdef USER32
-       mov     %ax,%fs
-       mov     %ax,%gs
-#endif
+       PUSH_SEGMENTS(%rax)
+       SET_KERNEL_SEGMENTS
 trap_set_segs:
        cld                             /* clear direction flag */
 #ifdef USER32
@@ -634,23 +639,20 @@ _return_to_user:
  */
 
 _return_from_kernel:
-       addq    $16,%rsp                /* skip FS/GS base */
 #ifndef USER32
-_kret_popl_gs:
-_kret_popl_fs:
-       addq    $16,%rsp                /* skip FS/GS selector */
+       addq    $32,%rsp                /* skip FS/GS selector */
 #else
 _kret_popl_gs:
        popq    %gs                     /* restore segment registers */
 _kret_popl_fs:
        popq    %fs
-#endif
 _kret_popl_es:
        popq    %rax
        movq    %rax,%es
 _kret_popl_ds:
        popq    %rax
        movq    %rax,%ds
+#endif
        popa                            /* restore general registers */
        addq    $16,%rsp                /* discard trap number and error code */
 _kret_iret:
@@ -777,8 +779,11 @@ INTERRUPT(255)
 /* XXX handle NMI - at least print a warning like Linux does.  */
 
 /*
- * All interrupts enter here.
- * old %eax on stack; interrupt number in %eax.
+ * All interrupts enter here. The cpu might have loaded a new RSP,
+ * depending on the previous CPL, as in alltraps.
+ * Old %eax on stack, interrupt number in %eax; we need to fill the remaining
+ * fields of struct i386_interrupt_state, which might be in the pcb or in the
+ * interrupt stack.
  */
 ENTRY(all_intrs)
        pushq   %rcx                    /* save registers */
@@ -791,24 +796,15 @@ ENTRY(all_intrs)
        pushq   %r11
        cld                             /* clear direction flag */
 
-       movq    %ds,%rdx                        /* save segment registers */
-       pushq   %rdx
-       movq    %es,%rdx
-       pushq   %rdx
-       PUSH_FSGS_ISR
+       PUSH_SEGMENTS_ISR(%rdx)
 
        movq    %rsp,%rdx               /* on an interrupt stack? */
        and     $(~(INTSTACK_SIZE-1)),%rdx
        cmpq    %ss:EXT(int_stack_base),%rdx
        je      int_from_intstack       /* if not: */
 
-       mov     %ss,%dx                 /* switch to kernel segments */
-       mov     %dx,%ds
-       mov     %dx,%es
-#ifdef USER32
-       mov     %dx,%fs
-       mov     %dx,%gs
-#endif
+       SET_KERNEL_SEGMENTS
+
        CPU_NUMBER(%edx)
 
        movq    CX(EXT(int_stack_top),%edx),%rcx
@@ -849,11 +845,7 @@ LEXT(return_to_iret)                       /* ( label for 
kdb_kintr and hardclock) */
        cmpq    $0,CX(EXT(need_ast),%edx)
        jnz     ast_from_interrupt      /* take it if so */
 1:
-       POP_FSGS_ISR
-       pop     %rdx
-       mov     %rdx,%es
-       pop     %rdx
-       mov     %rdx,%ds
+       POP_SEGMENTS_ISR(%rdx)
        pop     %r11
        pop     %r10
        pop     %r9
@@ -871,12 +863,7 @@ int_from_intstack:
        jb      stack_overflowed        /* if not: */
        call    EXT(interrupt)          /* call interrupt routine */
 _return_to_iret_i:                     /* ( label for kdb_kintr) */
-       POP_FSGS_ISR
-       pop     %rdx
-       mov     %rdx,%es
-       pop     %rdx
-       mov     %rdx,%ds
-
+       POP_SEGMENTS_ISR(%rdx)
        pop     %r11
        pop     %r10
        pop     %r9
@@ -909,11 +896,7 @@ stack_overflowed:
  *     ss
  */
 ast_from_interrupt:
-       POP_FSGS_ISR
-       pop     %rdx
-       mov     %rdx,%es
-       pop     %rdx
-       mov     %rdx,%ds
+       POP_SEGMENTS_ISR(%rdx)
        popq    %r11
        popq    %r10
        popq    %r9
@@ -926,19 +909,8 @@ ast_from_interrupt:
        pushq   $0                      /* zero code */
        pushq   $0                      /* zero trap number */
        pusha                           /* save general registers */
-       mov     %ds,%rdx                /* save segment registers */
-       push    %rdx
-       mov     %es,%rdx
-       push    %rdx
-       PUSH_FSGS_ISR
-
-       mov     %ss,%dx                 /* switch to kernel segments */
-       mov     %dx,%ds
-       mov     %dx,%es
-#ifdef USER32
-       mov     %dx,%fs
-       mov     %dx,%gs
-#endif
+       PUSH_SEGMENTS_ISR(%rdx)
+       SET_KERNEL_SEGMENTS
        CPU_NUMBER(%edx)
        TIME_TRAP_UENTRY
 
@@ -1056,20 +1028,12 @@ kdb_from_iret_i:                        /* on interrupt 
stack */
        pushq   $0                      /* zero error code */
        pushq   $0                      /* zero trap number */
        pusha                           /* save general registers */
-       mov     %ds,%rdx                /* save segment registers */
-       push    %rdx
-       mov     %es,%rdx
-       push    %rdx
-       PUSH_FSGS
+       PUSH_SEGMENTS(%rdx)
        movq    %rsp,%rdx               /* pass regs, */
        movq    $0,%rsi                 /* code, */
        movq    $-1,%rdi                /* type to kdb */
        call    EXT(kdb_trap)
-       POP_FSGS
-       pop     %rdx
-       mov     %rdx,%es
-       pop     %rdx
-       mov     %rdx,%ds
+       POP_SEGMENTS(%rdx)
        popa                            /* restore general registers */
        addq    $16,%rsp
 
@@ -1144,23 +1108,13 @@ ttd_from_iret_i:                        /* on interrupt 
stack */
        pushq   $0                      /* zero error code */
        pushq   $0                      /* zero trap number */
        pusha                           /* save general registers */
-       mov     %ds,%rdx                /* save segment registers */
-       push    %rdx
-       mov     %es,%rdx
-       push    %rdx
-       push    %fs
-       push    %gs
+       PUSH_SEGMENTS_ISR(%rdx)
        ud2     // TEST it
        movq    %rsp,%rdx               /* pass regs, */
        movq    $0,%rsi                 /* code, */
        movq    $-1,%rdi                /* type to kdb */
        call    _kttd_trap
-       pop     %gs                     /* restore segment registers */
-       pop     %fs
-       pop     %rdx
-       mov     %rdx,%es
-       pop     %rdx
-       mov     %rdx,%ds
+       POP_SEGMENTS_ISR(%rdx)
        popa                            /* restore general registers */
        addq    $16,%rsp
 
@@ -1193,20 +1147,8 @@ syscall_entry_2:
        pushq   $0                      /* clear trap number slot */
 
        pusha                           /* save the general registers */
-       movq    %ds,%rdx                /* and the segment registers */
-       pushq   %rdx
-       movq    %es,%rdx
-       pushq   %rdx
-       pushq   %fs
-       pushq   %gs
-       pushq   $0      // gsbase
-       pushq   $0      // fsbase
-
-       mov     %ss,%dx                 /* switch to kernel data segment */
-       mov     %dx,%ds
-       mov     %dx,%es
-       mov     %dx,%fs
-       mov     %dx,%gs
+       PUSH_SEGMENTS(%rdx)
+       SET_KERNEL_SEGMENTS
 
 /*
  * Shuffle eflags,eip,cs into proper places
-- 
2.39.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]