bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[VERY RFC PATCH] x86_64: Rework storing segment registers


From: Sergey Bugaev
Subject: [VERY RFC PATCH] x86_64: Rework storing segment registers
Date: Thu, 15 Jun 2023 17:54:10 +0300

* For USER32, don't store fs/gs base at all
* For !USER32, store fs/gs base outside of the PCB stack
* For !USER32, don't store or touch es, ds, fs, gs (but keep ss and cs)
* For !USER32, disable all of the v86 code
---

So I went ahead and just made x86_64 !USER32 not store or access those
segment registers, along with moving fs/gs base out of iss and disabling
v86 (not that I know what v86 is, but it sounds like something we don't
need to support considering we only allow running x86_64 code).

I have only tested my configuration (x86_64 !USER32 !MACH_KDB) -- quite
likely this doesn't build or work in others; but for me it seems to work
very well and I haven't got a single crash, in kernel space or user
space.

Please do review!

debootstrap is still not quite happy. I've uploaded the log here: [0]

[0]: https://paste.gg/p/anonymous/c976008dc38342cd963b0778586ead19

 i386/i386/debug_i386.c |   2 -
 i386/i386/i386asm.sym  |   4 +-
 i386/i386/pcb.c        |  30 ++++++++----
 i386/i386/thread.h     |  27 +++++++++--
 x86_64/locore.S        | 105 +++++++++++++++++------------------------
 5 files changed, 90 insertions(+), 78 deletions(-)

diff --git a/i386/i386/debug_i386.c b/i386/i386/debug_i386.c
index b5465796..41d032e3 100644
--- a/i386/i386/debug_i386.c
+++ b/i386/i386/debug_i386.c
@@ -40,8 +40,6 @@ void dump_ss(const struct i386_saved_state *st)
                st->r8, st->r9, st->r10, st->r11);
        printf("R12 %016lx R13 %016lx R14 %016lx R15 %016lx\n",
                st->r12, st->r13, st->r14, st->r15);
-       printf("FSBASE %016lx GSBASE %016lx\n",
-               st->fsbase, st->gsbase);
        printf("RIP %016lx EFLAGS %08lx\n", st->eip, st->efl);
 #else
        printf("EAX %08lx EBX %08lx ECX %08lx EDX %08lx\n",
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index fd0be557..8af0c5d6 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -84,8 +84,10 @@ size i386_kernel_state       iks
 
 size   i386_exception_link     iel
 
+#if !defined(__x86_64__) || defined(USER32)
 offset i386_saved_state        r       gs
 offset i386_saved_state        r       fs
+#endif
 offset i386_saved_state        r       cs
 offset i386_saved_state        r       uesp
 offset i386_saved_state        r       eax
@@ -108,8 +110,6 @@ offset      i386_saved_state        r       r12
 offset i386_saved_state        r       r13
 offset i386_saved_state        r       r14
 offset i386_saved_state        r       r15
-offset i386_saved_state        r       fsbase
-offset i386_saved_state        r       gsbase
 #endif
 
 offset i386_interrupt_state    i       eip
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index fb535709..d8987ddf 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -145,9 +145,14 @@ void switch_ktss(pcb_t pcb)
         *      won`t save the v86 segments, so we leave room.
         */
 
+#if !defined(__x86_64__) || defined(USER32)
        pcb_stack_top = (pcb->iss.efl & EFL_VM)
                        ? (long) (&pcb->iss + 1)
                        : (long) (&pcb->iss.v86_segs);
+#else
+       pcb_stack_top = (vm_offset_t) (&pcb->iss + 1);
+#endif
+
 #ifdef __x86_64__
        assert((pcb_stack_top & 0xF) == 0);
 #endif
@@ -224,8 +229,8 @@ void switch_ktss(pcb_t pcb)
 #endif /* MACH_PV_DESCRIPTORS */
 
 #if defined(__x86_64__) && !defined(USER32)
-       wrmsr(MSR_REG_FSBASE, pcb->iss.fsbase);
-       wrmsr(MSR_REG_GSBASE, pcb->iss.gsbase);
+       wrmsr(MSR_REG_FSBASE, pcb->isbs.fsbase);
+       wrmsr(MSR_REG_GSBASE, pcb->isbs.gsbase);
 #endif
 
        db_load_context(pcb);
@@ -412,10 +417,12 @@ void pcb_init(task_t parent_task, thread_t thread)
         */
        pcb->iss.cs = USER_CS;
        pcb->iss.ss = USER_DS;
+#if !defined(__x86_64__) || defined(USER32)
        pcb->iss.ds = USER_DS;
        pcb->iss.es = USER_DS;
        pcb->iss.fs = USER_DS;
        pcb->iss.gs = USER_DS;
+#endif
        pcb->iss.efl = EFL_USER_SET;
 
        thread->pcb = pcb;
@@ -477,6 +484,7 @@ kern_return_t thread_setstatus(
 
                state = (struct i386_thread_state *) tstate;
 
+#if !defined(__x86_64__) || defined(USER32)
                if (flavor == i386_REGS_SEGS_STATE) {
                    /*
                     * Code and stack selectors must not be null,
@@ -494,6 +502,7 @@ kern_return_t thread_setstatus(
                     || state->ss == 0 || (state->ss & SEL_PL) != SEL_PL_U)
                        return KERN_INVALID_ARGUMENT;
                }
+#endif
 
                saved_state = USER_REGS(thread);
 
@@ -532,7 +541,6 @@ kern_return_t thread_setstatus(
                saved_state->eip = state->eip;
                saved_state->efl = (state->efl & ~EFL_USER_CLEAR)
                                    | EFL_USER_SET;
-#endif /* __x86_64__ && !USER32 */
 
                /*
                 * Segment registers.  Set differently in V8086 mode.
@@ -590,6 +598,7 @@ kern_return_t thread_setstatus(
                    saved_state->fs = state->fs;
                    saved_state->gs = state->gs;
                }
+#endif /* __x86_64__ && !USER32 */
                break;
            }
 
@@ -631,6 +640,7 @@ kern_return_t thread_setstatus(
                break;
            }
 
+#if !defined(__x86_64__) || defined(USER32)
            case i386_V86_ASSIST_STATE:
            {
                struct i386_v86_assist_state *state;
@@ -657,7 +667,7 @@ kern_return_t thread_setstatus(
                        USER_REGS(thread)->efl & (EFL_TF | EFL_IF);
                break;
            }
-
+#endif
            case i386_DEBUG_STATE:
            {
                struct i386_debug_state *state;
@@ -680,8 +690,8 @@ kern_return_t thread_setstatus(
                             return KERN_INVALID_ARGUMENT;
 
                     state = (struct i386_fsgs_base_state *) tstate;
-                    thread->pcb->iss.fsbase = state->fs_base;
-                    thread->pcb->iss.gsbase = state->gs_base;
+                    thread->pcb->isbs.fsbase = state->fs_base;
+                    thread->pcb->isbs.gsbase = state->gs_base;
                     if (thread == current_thread()) {
                             wrmsr(MSR_REG_FSBASE, state->fs_base);
                             wrmsr(MSR_REG_GSBASE, state->gs_base);
@@ -766,7 +776,6 @@ kern_return_t thread_getstatus(
                state->uesp = saved_state->uesp;
                state->efl = saved_state->efl;
                state->esp = 0; /* unused */
-#endif /* __x86_64__ && !USER32 */
 
                state->cs = saved_state->cs;
                state->ss = saved_state->ss;
@@ -798,6 +807,7 @@ kern_return_t thread_getstatus(
                    state->fs = saved_state->fs & 0xffff;
                    state->gs = saved_state->gs & 0xffff;
                }
+#endif /* __x86_64__ && !USER32 */
                *count = i386_THREAD_STATE_COUNT;
                break;
            }
@@ -836,6 +846,7 @@ kern_return_t thread_getstatus(
                break;
            }
 
+#if !defined(__x86_64__) || defined(USER32)
            case i386_V86_ASSIST_STATE:
            {
                struct i386_v86_assist_state *state;
@@ -850,6 +861,7 @@ kern_return_t thread_getstatus(
                *count = i386_V86_ASSIST_STATE_COUNT;
                break;
            }
+#endif
 
            case i386_DEBUG_STATE:
            {
@@ -872,8 +884,8 @@ kern_return_t thread_getstatus(
                             return KERN_INVALID_ARGUMENT;
 
                     state = (struct i386_fsgs_base_state *) tstate;
-                    state->fs_base = thread->pcb->iss.fsbase;
-                    state->gs_base = thread->pcb->iss.gsbase;
+                    state->fs_base = thread->pcb->isbs.fsbase;
+                    state->gs_base = thread->pcb->isbs.gsbase;
                     *count = i386_FSGS_BASE_STATE_COUNT;
                     break;
             }
diff --git a/i386/i386/thread.h b/i386/i386/thread.h
index b5fc5ffb..eab762dc 100644
--- a/i386/i386/thread.h
+++ b/i386/i386/thread.h
@@ -51,14 +51,13 @@
  */
 
 struct i386_saved_state {
-#ifdef __x86_64__
-       unsigned long   fsbase;
-       unsigned long   gsbase;
-#endif
+#if !defined(__x86_64__) || defined(USER32)
        unsigned long   gs;
        unsigned long   fs;
        unsigned long   es;
        unsigned long   ds;
+#endif
+
 #ifdef __x86_64__
        unsigned long   r15;
        unsigned long   r14;
@@ -85,12 +84,15 @@ struct i386_saved_state {
        unsigned long   efl;
        unsigned long   uesp;
        unsigned long   ss;
+
+#if !defined(__x86_64__) || defined(USER32)
        struct v86_segs {
            unsigned long v86_es;       /* virtual 8086 segment registers */
            unsigned long v86_ds;
            unsigned long v86_fs;
            unsigned long v86_gs;
        } v86_segs;
+#endif
 };
 
 /*
@@ -144,6 +146,7 @@ struct i386_fpsave_state {
        };
 };
 
+#if !defined(__x86_64__) || defined(USER32)
 /*
  *     v86_assist_state:
  *
@@ -157,6 +160,7 @@ struct v86_assist_state {
        unsigned short          flags;  /* 8086 flag bits */
 };
 #define        V86_IF_PENDING          0x8000  /* unused bit */
+#endif
 
 /*
  *     i386_interrupt_state:
@@ -167,10 +171,13 @@ struct v86_assist_state {
  */
 
 struct i386_interrupt_state {
+#if !defined(__x86_64__) || defined(USER32)
        long    gs;
        long    fs;
        long    es;
        long    ds;
+#endif
+
 #ifdef __x86_64__
        long    r11;
        long    r10;
@@ -187,6 +194,13 @@ struct i386_interrupt_state {
        long    efl;
 };
 
+#if defined(__x86_64__) && !defined(USER32)
+struct i386_saved_fsgs_base_state {
+       unsigned long   fsbase;
+       unsigned long   gsbase;
+};
+#endif
+
 /*
  *     i386_machine_state:
  *
@@ -197,7 +211,9 @@ struct i386_interrupt_state {
 struct i386_machine_state {
        struct user_ldt *       ldt;
        struct i386_fpsave_state *ifps;
+#if !defined(__x86_64__) || defined(USER32)
        struct v86_assist_state v86s;
+#endif
        struct real_descriptor user_gdt[USER_GDT_SLOTS];
        struct i386_debug_state ids;
 };
@@ -209,6 +225,9 @@ typedef struct pcb {
 #endif
        struct i386_saved_state iss;
        struct i386_machine_state ims;
+#if defined(__x86_64__) && !defined(USER32)
+       struct i386_saved_fsgs_base_state isbs;
+#endif
        decl_simple_lock_data(, lock)
        unsigned short init_control;            /* Initial FPU control to set */
 #ifdef LINUX_DEV
diff --git a/x86_64/locore.S b/x86_64/locore.S
index 4d61d618..fba2ad03 100644
--- a/x86_64/locore.S
+++ b/x86_64/locore.S
@@ -42,45 +42,6 @@
 #define pusha pushq %rax ; pushq %rcx ; pushq %rdx ; pushq %rbx ; subq $8,%rsp 
; pushq %rbp ; pushq %rsi ; pushq %rdi ; pushq %r8 ; pushq %r9 ; pushq %r10 ; 
pushq %r11 ; pushq %r12 ; pushq %r13 ; pushq %r14 ; pushq %r15
 #define popa popq %r15 ; popq %r14 ; popq %r13 ; popq %r12 ; popq %r11 ; popq 
%r10 ; popq %r9 ; popq %r8 ; popq %rdi ; popq %rsi ; popq %rbp ; addq $8,%rsp ; 
popq %rbx ; popq %rdx ; popq %rcx ; popq %rax
 
-#ifdef USER32
-#define PUSH_FSGS              \
-       pushq   %fs             ;\
-       pushq   %gs             ;\
-       subq    $16,%rsp
-#else
-#define PUSH_FSGS              \
-       subq    $32,%rsp
-#endif
-
-#ifdef USER32
-#define POP_FSGS               \
-       popq    %gs             ;\
-       popq    %fs             ;\
-       addq    $16,%rsp
-#else
-#define POP_FSGS               \
-       addq    $32,%rsp
-#endif
-
-#ifdef USER32
-#define PUSH_FSGS_ISR          \
-       pushq   %fs             ;\
-       pushq   %gs
-#else
-#define PUSH_FSGS_ISR          \
-       subq    $16,%rsp
-#endif
-
-#ifdef USER32
-#define POP_FSGS_ISR           \
-       popq    %gs             ;\
-       popq    %fs
-#else
-#define POP_FSGS_ISR           \
-       addq    $16,%rsp
-#endif
-
-
 
 /*
  * Fault recovery.
@@ -368,14 +329,17 @@ ENTRY(t_segnp)
                                        /* indicate fault type */
 
 trap_check_kernel_exit:
+#ifdef USER32
        testq   $(EFL_VM),32(%rsp)      /* is trap from V86 mode? */
        jnz     EXT(alltraps)           /* isn`t kernel trap if so */
+#endif
        /* Note: handling KERNEL_RING value by hand */
        testq   $2,24(%rsp)             /* is trap from kernel mode? */
        jnz     EXT(alltraps)           /* if so:  */
                                        /* check for the kernel exit sequence */
        cmpq    $_kret_iret,16(%rsp)    /* on IRET? */
        je      fault_iret
+#ifdef USER32
 #if 0
        cmpq    $_kret_popl_ds,16(%rsp) /* popping DS? */
        je      fault_popl_ds
@@ -386,6 +350,7 @@ trap_check_kernel_exit:
        je      fault_popl_fs
        cmpq    $_kret_popl_gs,16(%rsp) /* popping GS? */
        je      fault_popl_gs
+#endif
 take_fault:                            /* if none of the above: */
        jmp     EXT(alltraps)           /* treat as normal trap. */
 
@@ -414,6 +379,7 @@ fault_iret:
        popq    %rax                    /* restore eax */
        jmp     EXT(alltraps)           /* take fault */
 
+#ifdef USER32
 /*
  * Fault restoring a segment register.  The user's registers are still
  * saved on the stack.  The offending segment register has not been
@@ -446,13 +412,11 @@ push_fs:
        pushq   %fs                     /* restore fs, */
 push_gs:
        pushq   %gs                     /* restore gs. */
-push_gsbase:
-       pushq   $0
-       pushq   $0
 push_segregs:
        movq    %rax,R_TRAPNO(%rsp)     /* set trap number */
        movq    %rdx,R_ERR(%rsp)        /* set error code */
        jmp     trap_set_segs           /* take trap */
+#endif
 
 /*
  * Debug trap.  Check for single-stepping across system call into
@@ -462,8 +426,10 @@ push_segregs:
  */
 ENTRY(t_debug)
        INT_FIX
+#ifdef USER32
        testq   $(EFL_VM),16(%rsp)      /* is trap from V86 mode? */
        jnz     0f                      /* isn`t kernel trap if so */
+#endif
        /* Note: handling KERNEL_RING value by hand */
        testq   $2,8(%rsp)              /* is trap from kernel mode? */
        jnz     0f                      /* if so: */
@@ -510,11 +476,13 @@ ENTRY(t_page_fault)
 ENTRY(alltraps)
        pusha                           /* save the general registers */
 trap_push_segs:
+#ifdef USER32
        movq    %ds,%rax                        /* and the segment registers */
        pushq   %rax
        movq    %es,%rax                        /* and the segment registers */
        pushq   %rax
-       PUSH_FSGS
+       pushq   %fs
+       pushq   %gs
 
        /* Note that we have to load the segment registers
           even if this is a trap from the kernel,
@@ -523,14 +491,15 @@ trap_push_segs:
        mov     %ss,%ax                 /* switch to kernel data segment */
        mov     %ax,%ds                 /* (same as kernel stack segment) */
        mov     %ax,%es
-#ifdef USER32
        mov     %ax,%fs
        mov     %ax,%gs
 #endif
 trap_set_segs:
        cld                             /* clear direction flag */
+#ifdef USER32
        testl   $(EFL_VM),R_EFLAGS(%rsp) /* in V86 mode? */
        jnz     trap_from_user          /* user mode trap if so */
+#endif
        /* Note: handling KERNEL_RING value by hand */
        testb   $2,R_CS(%rsp)           /* user mode trap? */
        jz      trap_from_kernel        /* kernel trap if not */
@@ -580,23 +549,18 @@ _return_to_user:
  */
 
 _return_from_kernel:
-       addq    $16,%rsp                /* skip FS/GS base */
-#ifndef USER32
-_kret_popl_gs:
-_kret_popl_fs:
-       addq    $16,%rsp                /* skip FS/GS selector */
-#else
+#ifdef USER32
 _kret_popl_gs:
        popq    %gs                     /* restore segment registers */
 _kret_popl_fs:
        popq    %fs
-#endif
 _kret_popl_es:
        popq    %rax
        movq    %rax,%es
 _kret_popl_ds:
        popq    %rax
        movq    %rax,%ds
+#endif
        popa                            /* restore general registers */
        addq    $16,%rsp                /* discard trap number and error code */
 _kret_iret:
@@ -742,16 +706,17 @@ ENTRY(all_intrs)
        cmpq    %ss:EXT(int_stack_base),%rdx
        je      int_from_intstack       /* if not: */
 
+#ifdef USER32
        movq    %ds,%rdx                        /* save segment registers */
        pushq   %rdx
        movq    %es,%rdx
        pushq   %rdx
-       PUSH_FSGS_ISR
+       pushq   %fs
+       pushq   %gs
 
        mov     %ss,%dx                 /* switch to kernel segments */
        mov     %dx,%ds
        mov     %dx,%es
-#ifdef USER32
        mov     %dx,%fs
        mov     %dx,%gs
 #endif
@@ -784,8 +749,10 @@ LEXT(return_to_iret)                       /* ( label for 
kdb_kintr and hardclock) */
 
        popq    %rsp                    /* switch back to old stack */
 
+#ifdef USER32
        testl   $(EFL_VM),I_EFL(%rsp)   /* if in V86 */
        jnz     0f                      /* or */
+#endif
        /* Note: handling KERNEL_RING value by hand */
        testb   $2,I_CS(%rsp)           /* user mode, */
        jz      1f                      /* check for ASTs */
@@ -793,11 +760,14 @@ LEXT(return_to_iret)                      /* ( label for 
kdb_kintr and hardclock) */
        cmpq    $0,CX(EXT(need_ast),%edx)
        jnz     ast_from_interrupt      /* take it if so */
 1:
-       POP_FSGS_ISR
+#ifdef USER32
+       popq    %gs
+       popq    %fs
        pop     %rdx
        mov     %rdx,%es
        pop     %rdx
        mov     %rdx,%ds
+#endif
        pop     %r11
        pop     %r10
        pop     %r9
@@ -847,11 +817,14 @@ stack_overflowed:
  *     ss
  */
 ast_from_interrupt:
-       POP_FSGS_ISR
+#ifdef USER32
+       popq    %gs
+       popq    %fs
        pop     %rdx
        mov     %rdx,%es
        pop     %rdx
        mov     %rdx,%ds
+#endif
        popq    %r11
        popq    %r10
        popq    %r9
@@ -864,16 +837,18 @@ ast_from_interrupt:
        pushq   $0                      /* zero code */
        pushq   $0                      /* zero trap number */
        pusha                           /* save general registers */
+
+#ifdef USER32
        mov     %ds,%rdx                /* save segment registers */
        push    %rdx
        mov     %es,%rdx
        push    %rdx
-       PUSH_FSGS_ISR
+       pushq   %fs
+       pushq   %gs
 
        mov     %ss,%dx                 /* switch to kernel segments */
        mov     %dx,%ds
        mov     %dx,%es
-#ifdef USER32
        mov     %dx,%fs
        mov     %dx,%gs
 #endif
@@ -994,20 +969,26 @@ kdb_from_iret_i:                  /* on interrupt stack */
        pushq   $0                      /* zero error code */
        pushq   $0                      /* zero trap number */
        pusha                           /* save general registers */
+#ifdef USER32
        mov     %ds,%rdx                /* save segment registers */
        push    %rdx
        mov     %es,%rdx
        push    %rdx
-       PUSH_FSGS
+       pushq   %fs
+       pushq   %gs
+#endif
        movq    %rsp,%rdx               /* pass regs, */
        movq    $0,%rsi                 /* code, */
        movq    $-1,%rdi                /* type to kdb */
        call    EXT(kdb_trap)
-       POP_FSGS
+#ifdef USER32
+       popq    %gs
+       popq    %fs
        pop     %rdx
        mov     %rdx,%es
        pop     %rdx
        mov     %rdx,%ds
+#endif
        popa                            /* restore general registers */
        addq    $16,%rsp
 
@@ -1082,23 +1063,27 @@ ttd_from_iret_i:                        /* on interrupt 
stack */
        pushq   $0                      /* zero error code */
        pushq   $0                      /* zero trap number */
        pusha                           /* save general registers */
+#ifdef USER32
        mov     %ds,%rdx                /* save segment registers */
        push    %rdx
        mov     %es,%rdx
        push    %rdx
        push    %fs
        push    %gs
+#endif
        ud2     // TEST it
        movq    %rsp,%rdx               /* pass regs, */
        movq    $0,%rsi                 /* code, */
        movq    $-1,%rdi                /* type to kdb */
        call    _kttd_trap
+#ifdef USER32
        pop     %gs                     /* restore segment registers */
        pop     %fs
        pop     %rdx
        mov     %rdx,%es
        pop     %rdx
        mov     %rdx,%ds
+#endif
        popa                            /* restore general registers */
        addq    $16,%rsp
 
@@ -1137,8 +1122,6 @@ syscall_entry_2:
        pushq   %rdx
        pushq   %fs
        pushq   %gs
-       pushq   $0      // gsbase
-       pushq   $0      // fsbase
 
        mov     %ss,%dx                 /* switch to kernel data segment */
        mov     %dx,%ds
-- 
2.40.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]