[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[VERY RFC PATCH] x86_64: Rework storing segment registers
From: |
Sergey Bugaev |
Subject: |
[VERY RFC PATCH] x86_64: Rework storing segment registers |
Date: |
Thu, 15 Jun 2023 17:54:10 +0300 |
* For USER32, don't store fs/gs base at all
* For !USER32, store fs/gs base outside of the PCB stack
* For !USER32, don't store or touch es, ds, fs, gs (but keep ss and cs)
* For !USER32, disable all of the v86 code
---
So I went ahead and just made x86_64 !USER32 not store or access those
segment registers, along with moving fs/gs base out of iss and disabling
v86 (not that I know what v86 is, but it sounds like something we don't
need to support considering we only allow running x86_64 code).
I have only tested my configuration (x86_64 !USER32 !MACH_KDB) -- quite
likely this doesn't build or work in others; but for me it seems to work
very well and I haven't got a single crash, in kernel space or user
space.
Please do review!
debootstrap is still not quite happy. I've uploaded the log here: [0]
[0]: https://paste.gg/p/anonymous/c976008dc38342cd963b0778586ead19
i386/i386/debug_i386.c | 2 -
i386/i386/i386asm.sym | 4 +-
i386/i386/pcb.c | 30 ++++++++----
i386/i386/thread.h | 27 +++++++++--
x86_64/locore.S | 105 +++++++++++++++++------------------------
5 files changed, 90 insertions(+), 78 deletions(-)
diff --git a/i386/i386/debug_i386.c b/i386/i386/debug_i386.c
index b5465796..41d032e3 100644
--- a/i386/i386/debug_i386.c
+++ b/i386/i386/debug_i386.c
@@ -40,8 +40,6 @@ void dump_ss(const struct i386_saved_state *st)
st->r8, st->r9, st->r10, st->r11);
printf("R12 %016lx R13 %016lx R14 %016lx R15 %016lx\n",
st->r12, st->r13, st->r14, st->r15);
- printf("FSBASE %016lx GSBASE %016lx\n",
- st->fsbase, st->gsbase);
printf("RIP %016lx EFLAGS %08lx\n", st->eip, st->efl);
#else
printf("EAX %08lx EBX %08lx ECX %08lx EDX %08lx\n",
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index fd0be557..8af0c5d6 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -84,8 +84,10 @@ size i386_kernel_state iks
size i386_exception_link iel
+#if !defined(__x86_64__) || defined(USER32)
offset i386_saved_state r gs
offset i386_saved_state r fs
+#endif
offset i386_saved_state r cs
offset i386_saved_state r uesp
offset i386_saved_state r eax
@@ -108,8 +110,6 @@ offset i386_saved_state r r12
offset i386_saved_state r r13
offset i386_saved_state r r14
offset i386_saved_state r r15
-offset i386_saved_state r fsbase
-offset i386_saved_state r gsbase
#endif
offset i386_interrupt_state i eip
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index fb535709..d8987ddf 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -145,9 +145,14 @@ void switch_ktss(pcb_t pcb)
* won`t save the v86 segments, so we leave room.
*/
+#if !defined(__x86_64__) || defined(USER32)
pcb_stack_top = (pcb->iss.efl & EFL_VM)
? (long) (&pcb->iss + 1)
: (long) (&pcb->iss.v86_segs);
+#else
+ pcb_stack_top = (vm_offset_t) (&pcb->iss + 1);
+#endif
+
#ifdef __x86_64__
assert((pcb_stack_top & 0xF) == 0);
#endif
@@ -224,8 +229,8 @@ void switch_ktss(pcb_t pcb)
#endif /* MACH_PV_DESCRIPTORS */
#if defined(__x86_64__) && !defined(USER32)
- wrmsr(MSR_REG_FSBASE, pcb->iss.fsbase);
- wrmsr(MSR_REG_GSBASE, pcb->iss.gsbase);
+ wrmsr(MSR_REG_FSBASE, pcb->isbs.fsbase);
+ wrmsr(MSR_REG_GSBASE, pcb->isbs.gsbase);
#endif
db_load_context(pcb);
@@ -412,10 +417,12 @@ void pcb_init(task_t parent_task, thread_t thread)
*/
pcb->iss.cs = USER_CS;
pcb->iss.ss = USER_DS;
+#if !defined(__x86_64__) || defined(USER32)
pcb->iss.ds = USER_DS;
pcb->iss.es = USER_DS;
pcb->iss.fs = USER_DS;
pcb->iss.gs = USER_DS;
+#endif
pcb->iss.efl = EFL_USER_SET;
thread->pcb = pcb;
@@ -477,6 +484,7 @@ kern_return_t thread_setstatus(
state = (struct i386_thread_state *) tstate;
+#if !defined(__x86_64__) || defined(USER32)
if (flavor == i386_REGS_SEGS_STATE) {
/*
* Code and stack selectors must not be null,
@@ -494,6 +502,7 @@ kern_return_t thread_setstatus(
|| state->ss == 0 || (state->ss & SEL_PL) != SEL_PL_U)
return KERN_INVALID_ARGUMENT;
}
+#endif
saved_state = USER_REGS(thread);
@@ -532,7 +541,6 @@ kern_return_t thread_setstatus(
saved_state->eip = state->eip;
saved_state->efl = (state->efl & ~EFL_USER_CLEAR)
| EFL_USER_SET;
-#endif /* __x86_64__ && !USER32 */
/*
* Segment registers. Set differently in V8086 mode.
@@ -590,6 +598,7 @@ kern_return_t thread_setstatus(
saved_state->fs = state->fs;
saved_state->gs = state->gs;
}
+#endif /* __x86_64__ && !USER32 */
break;
}
@@ -631,6 +640,7 @@ kern_return_t thread_setstatus(
break;
}
+#if !defined(__x86_64__) || defined(USER32)
case i386_V86_ASSIST_STATE:
{
struct i386_v86_assist_state *state;
@@ -657,7 +667,7 @@ kern_return_t thread_setstatus(
USER_REGS(thread)->efl & (EFL_TF | EFL_IF);
break;
}
-
+#endif
case i386_DEBUG_STATE:
{
struct i386_debug_state *state;
@@ -680,8 +690,8 @@ kern_return_t thread_setstatus(
return KERN_INVALID_ARGUMENT;
state = (struct i386_fsgs_base_state *) tstate;
- thread->pcb->iss.fsbase = state->fs_base;
- thread->pcb->iss.gsbase = state->gs_base;
+ thread->pcb->isbs.fsbase = state->fs_base;
+ thread->pcb->isbs.gsbase = state->gs_base;
if (thread == current_thread()) {
wrmsr(MSR_REG_FSBASE, state->fs_base);
wrmsr(MSR_REG_GSBASE, state->gs_base);
@@ -766,7 +776,6 @@ kern_return_t thread_getstatus(
state->uesp = saved_state->uesp;
state->efl = saved_state->efl;
state->esp = 0; /* unused */
-#endif /* __x86_64__ && !USER32 */
state->cs = saved_state->cs;
state->ss = saved_state->ss;
@@ -798,6 +807,7 @@ kern_return_t thread_getstatus(
state->fs = saved_state->fs & 0xffff;
state->gs = saved_state->gs & 0xffff;
}
+#endif /* __x86_64__ && !USER32 */
*count = i386_THREAD_STATE_COUNT;
break;
}
@@ -836,6 +846,7 @@ kern_return_t thread_getstatus(
break;
}
+#if !defined(__x86_64__) || defined(USER32)
case i386_V86_ASSIST_STATE:
{
struct i386_v86_assist_state *state;
@@ -850,6 +861,7 @@ kern_return_t thread_getstatus(
*count = i386_V86_ASSIST_STATE_COUNT;
break;
}
+#endif
case i386_DEBUG_STATE:
{
@@ -872,8 +884,8 @@ kern_return_t thread_getstatus(
return KERN_INVALID_ARGUMENT;
state = (struct i386_fsgs_base_state *) tstate;
- state->fs_base = thread->pcb->iss.fsbase;
- state->gs_base = thread->pcb->iss.gsbase;
+ state->fs_base = thread->pcb->isbs.fsbase;
+ state->gs_base = thread->pcb->isbs.gsbase;
*count = i386_FSGS_BASE_STATE_COUNT;
break;
}
diff --git a/i386/i386/thread.h b/i386/i386/thread.h
index b5fc5ffb..eab762dc 100644
--- a/i386/i386/thread.h
+++ b/i386/i386/thread.h
@@ -51,14 +51,13 @@
*/
struct i386_saved_state {
-#ifdef __x86_64__
- unsigned long fsbase;
- unsigned long gsbase;
-#endif
+#if !defined(__x86_64__) || defined(USER32)
unsigned long gs;
unsigned long fs;
unsigned long es;
unsigned long ds;
+#endif
+
#ifdef __x86_64__
unsigned long r15;
unsigned long r14;
@@ -85,12 +84,15 @@ struct i386_saved_state {
unsigned long efl;
unsigned long uesp;
unsigned long ss;
+
+#if !defined(__x86_64__) || defined(USER32)
struct v86_segs {
unsigned long v86_es; /* virtual 8086 segment registers */
unsigned long v86_ds;
unsigned long v86_fs;
unsigned long v86_gs;
} v86_segs;
+#endif
};
/*
@@ -144,6 +146,7 @@ struct i386_fpsave_state {
};
};
+#if !defined(__x86_64__) || defined(USER32)
/*
* v86_assist_state:
*
@@ -157,6 +160,7 @@ struct v86_assist_state {
unsigned short flags; /* 8086 flag bits */
};
#define V86_IF_PENDING 0x8000 /* unused bit */
+#endif
/*
* i386_interrupt_state:
@@ -167,10 +171,13 @@ struct v86_assist_state {
*/
struct i386_interrupt_state {
+#if !defined(__x86_64__) || defined(USER32)
long gs;
long fs;
long es;
long ds;
+#endif
+
#ifdef __x86_64__
long r11;
long r10;
@@ -187,6 +194,13 @@ struct i386_interrupt_state {
long efl;
};
+#if defined(__x86_64__) && !defined(USER32)
+struct i386_saved_fsgs_base_state {
+ unsigned long fsbase;
+ unsigned long gsbase;
+};
+#endif
+
/*
* i386_machine_state:
*
@@ -197,7 +211,9 @@ struct i386_interrupt_state {
struct i386_machine_state {
struct user_ldt * ldt;
struct i386_fpsave_state *ifps;
+#if !defined(__x86_64__) || defined(USER32)
struct v86_assist_state v86s;
+#endif
struct real_descriptor user_gdt[USER_GDT_SLOTS];
struct i386_debug_state ids;
};
@@ -209,6 +225,9 @@ typedef struct pcb {
#endif
struct i386_saved_state iss;
struct i386_machine_state ims;
+#if defined(__x86_64__) && !defined(USER32)
+ struct i386_saved_fsgs_base_state isbs;
+#endif
decl_simple_lock_data(, lock)
unsigned short init_control; /* Initial FPU control to set */
#ifdef LINUX_DEV
diff --git a/x86_64/locore.S b/x86_64/locore.S
index 4d61d618..fba2ad03 100644
--- a/x86_64/locore.S
+++ b/x86_64/locore.S
@@ -42,45 +42,6 @@
#define pusha pushq %rax ; pushq %rcx ; pushq %rdx ; pushq %rbx ; subq $8,%rsp
; pushq %rbp ; pushq %rsi ; pushq %rdi ; pushq %r8 ; pushq %r9 ; pushq %r10 ;
pushq %r11 ; pushq %r12 ; pushq %r13 ; pushq %r14 ; pushq %r15
#define popa popq %r15 ; popq %r14 ; popq %r13 ; popq %r12 ; popq %r11 ; popq
%r10 ; popq %r9 ; popq %r8 ; popq %rdi ; popq %rsi ; popq %rbp ; addq $8,%rsp ;
popq %rbx ; popq %rdx ; popq %rcx ; popq %rax
-#ifdef USER32
-#define PUSH_FSGS \
- pushq %fs ;\
- pushq %gs ;\
- subq $16,%rsp
-#else
-#define PUSH_FSGS \
- subq $32,%rsp
-#endif
-
-#ifdef USER32
-#define POP_FSGS \
- popq %gs ;\
- popq %fs ;\
- addq $16,%rsp
-#else
-#define POP_FSGS \
- addq $32,%rsp
-#endif
-
-#ifdef USER32
-#define PUSH_FSGS_ISR \
- pushq %fs ;\
- pushq %gs
-#else
-#define PUSH_FSGS_ISR \
- subq $16,%rsp
-#endif
-
-#ifdef USER32
-#define POP_FSGS_ISR \
- popq %gs ;\
- popq %fs
-#else
-#define POP_FSGS_ISR \
- addq $16,%rsp
-#endif
-
-
/*
* Fault recovery.
@@ -368,14 +329,17 @@ ENTRY(t_segnp)
/* indicate fault type */
trap_check_kernel_exit:
+#ifdef USER32
testq $(EFL_VM),32(%rsp) /* is trap from V86 mode? */
jnz EXT(alltraps) /* isn`t kernel trap if so */
+#endif
/* Note: handling KERNEL_RING value by hand */
testq $2,24(%rsp) /* is trap from kernel mode? */
jnz EXT(alltraps) /* if so: */
/* check for the kernel exit sequence */
cmpq $_kret_iret,16(%rsp) /* on IRET? */
je fault_iret
+#ifdef USER32
#if 0
cmpq $_kret_popl_ds,16(%rsp) /* popping DS? */
je fault_popl_ds
@@ -386,6 +350,7 @@ trap_check_kernel_exit:
je fault_popl_fs
cmpq $_kret_popl_gs,16(%rsp) /* popping GS? */
je fault_popl_gs
+#endif
take_fault: /* if none of the above: */
jmp EXT(alltraps) /* treat as normal trap. */
@@ -414,6 +379,7 @@ fault_iret:
popq %rax /* restore eax */
jmp EXT(alltraps) /* take fault */
+#ifdef USER32
/*
* Fault restoring a segment register. The user's registers are still
* saved on the stack. The offending segment register has not been
@@ -446,13 +412,11 @@ push_fs:
pushq %fs /* restore fs, */
push_gs:
pushq %gs /* restore gs. */
-push_gsbase:
- pushq $0
- pushq $0
push_segregs:
movq %rax,R_TRAPNO(%rsp) /* set trap number */
movq %rdx,R_ERR(%rsp) /* set error code */
jmp trap_set_segs /* take trap */
+#endif
/*
* Debug trap. Check for single-stepping across system call into
@@ -462,8 +426,10 @@ push_segregs:
*/
ENTRY(t_debug)
INT_FIX
+#ifdef USER32
testq $(EFL_VM),16(%rsp) /* is trap from V86 mode? */
jnz 0f /* isn`t kernel trap if so */
+#endif
/* Note: handling KERNEL_RING value by hand */
testq $2,8(%rsp) /* is trap from kernel mode? */
jnz 0f /* if so: */
@@ -510,11 +476,13 @@ ENTRY(t_page_fault)
ENTRY(alltraps)
pusha /* save the general registers */
trap_push_segs:
+#ifdef USER32
movq %ds,%rax /* and the segment registers */
pushq %rax
movq %es,%rax /* and the segment registers */
pushq %rax
- PUSH_FSGS
+ pushq %fs
+ pushq %gs
/* Note that we have to load the segment registers
even if this is a trap from the kernel,
@@ -523,14 +491,15 @@ trap_push_segs:
mov %ss,%ax /* switch to kernel data segment */
mov %ax,%ds /* (same as kernel stack segment) */
mov %ax,%es
-#ifdef USER32
mov %ax,%fs
mov %ax,%gs
#endif
trap_set_segs:
cld /* clear direction flag */
+#ifdef USER32
testl $(EFL_VM),R_EFLAGS(%rsp) /* in V86 mode? */
jnz trap_from_user /* user mode trap if so */
+#endif
/* Note: handling KERNEL_RING value by hand */
testb $2,R_CS(%rsp) /* user mode trap? */
jz trap_from_kernel /* kernel trap if not */
@@ -580,23 +549,18 @@ _return_to_user:
*/
_return_from_kernel:
- addq $16,%rsp /* skip FS/GS base */
-#ifndef USER32
-_kret_popl_gs:
-_kret_popl_fs:
- addq $16,%rsp /* skip FS/GS selector */
-#else
+#ifdef USER32
_kret_popl_gs:
popq %gs /* restore segment registers */
_kret_popl_fs:
popq %fs
-#endif
_kret_popl_es:
popq %rax
movq %rax,%es
_kret_popl_ds:
popq %rax
movq %rax,%ds
+#endif
popa /* restore general registers */
addq $16,%rsp /* discard trap number and error code */
_kret_iret:
@@ -742,16 +706,17 @@ ENTRY(all_intrs)
cmpq %ss:EXT(int_stack_base),%rdx
je int_from_intstack /* if not: */
+#ifdef USER32
movq %ds,%rdx /* save segment registers */
pushq %rdx
movq %es,%rdx
pushq %rdx
- PUSH_FSGS_ISR
+ pushq %fs
+ pushq %gs
mov %ss,%dx /* switch to kernel segments */
mov %dx,%ds
mov %dx,%es
-#ifdef USER32
mov %dx,%fs
mov %dx,%gs
#endif
@@ -784,8 +749,10 @@ LEXT(return_to_iret) /* ( label for
kdb_kintr and hardclock) */
popq %rsp /* switch back to old stack */
+#ifdef USER32
testl $(EFL_VM),I_EFL(%rsp) /* if in V86 */
jnz 0f /* or */
+#endif
/* Note: handling KERNEL_RING value by hand */
testb $2,I_CS(%rsp) /* user mode, */
jz 1f /* check for ASTs */
@@ -793,11 +760,14 @@ LEXT(return_to_iret) /* ( label for
kdb_kintr and hardclock) */
cmpq $0,CX(EXT(need_ast),%edx)
jnz ast_from_interrupt /* take it if so */
1:
- POP_FSGS_ISR
+#ifdef USER32
+ popq %gs
+ popq %fs
pop %rdx
mov %rdx,%es
pop %rdx
mov %rdx,%ds
+#endif
pop %r11
pop %r10
pop %r9
@@ -847,11 +817,14 @@ stack_overflowed:
* ss
*/
ast_from_interrupt:
- POP_FSGS_ISR
+#ifdef USER32
+ popq %gs
+ popq %fs
pop %rdx
mov %rdx,%es
pop %rdx
mov %rdx,%ds
+#endif
popq %r11
popq %r10
popq %r9
@@ -864,16 +837,18 @@ ast_from_interrupt:
pushq $0 /* zero code */
pushq $0 /* zero trap number */
pusha /* save general registers */
+
+#ifdef USER32
mov %ds,%rdx /* save segment registers */
push %rdx
mov %es,%rdx
push %rdx
- PUSH_FSGS_ISR
+ pushq %fs
+ pushq %gs
mov %ss,%dx /* switch to kernel segments */
mov %dx,%ds
mov %dx,%es
-#ifdef USER32
mov %dx,%fs
mov %dx,%gs
#endif
@@ -994,20 +969,26 @@ kdb_from_iret_i: /* on interrupt stack */
pushq $0 /* zero error code */
pushq $0 /* zero trap number */
pusha /* save general registers */
+#ifdef USER32
mov %ds,%rdx /* save segment registers */
push %rdx
mov %es,%rdx
push %rdx
- PUSH_FSGS
+ pushq %fs
+ pushq %gs
+#endif
movq %rsp,%rdx /* pass regs, */
movq $0,%rsi /* code, */
movq $-1,%rdi /* type to kdb */
call EXT(kdb_trap)
- POP_FSGS
+#ifdef USER32
+ popq %gs
+ popq %fs
pop %rdx
mov %rdx,%es
pop %rdx
mov %rdx,%ds
+#endif
popa /* restore general registers */
addq $16,%rsp
@@ -1082,23 +1063,27 @@ ttd_from_iret_i: /* on interrupt
stack */
pushq $0 /* zero error code */
pushq $0 /* zero trap number */
pusha /* save general registers */
+#ifdef USER32
mov %ds,%rdx /* save segment registers */
push %rdx
mov %es,%rdx
push %rdx
push %fs
push %gs
+#endif
ud2 // TEST it
movq %rsp,%rdx /* pass regs, */
movq $0,%rsi /* code, */
movq $-1,%rdi /* type to kdb */
call _kttd_trap
+#ifdef USER32
pop %gs /* restore segment registers */
pop %fs
pop %rdx
mov %rdx,%es
pop %rdx
mov %rdx,%ds
+#endif
popa /* restore general registers */
addq $16,%rsp
@@ -1137,8 +1122,6 @@ syscall_entry_2:
pushq %rdx
pushq %fs
pushq %gs
- pushq $0 // gsbase
- pushq $0 // fsbase
mov %ss,%dx /* switch to kernel data segment */
mov %dx,%ds
--
2.40.1
- [VERY RFC PATCH] x86_64: Rework storing segment registers,
Sergey Bugaev <=