|
From: | Luca Dariz |
Subject: | Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point |
Date: | Tue, 28 Feb 2023 14:25:59 +0100 |
Il 27/02/23 23:02, Samuel Thibault ha scritto:
Luca Dariz, le lun. 27 févr. 2023 21:45:01 +0100, a ecrit:diff --git a/i386/i386/ldt.h b/i386/i386/ldt.h index b15f11a5..4490f99f 100644 --- a/i386/i386/ldt.h +++ b/i386/i386/ldt.h @@ -45,9 +45,14 @@ #define USER_SCALL 0x07 /* system call gate */ #ifdef __x86_64__ /* Call gate needs two entries */ -#endif + +/* The sysret instruction puts some constraints on the user segment indexes */ +#define USER_CS 0x1f /* user code segment */ +#define USER_DS 0x17 /* user data segment */I'd say we'd rather avoid changing them for the x86_64 && USER32 case?
Right, I forgot to add ! USER32 here
+#else #define USER_CS 0x17 /* user code segment */ #define USER_DS 0x1f /* user data segment */ +#endif#define LDTSZ 4 diff --git a/i386/include/mach/i386/syscall_sw.h b/i386/include/mach/i386/syscall_sw.hindex 86f6ff2f..20ef7c13 100644 --- a/i386/include/mach/i386/syscall_sw.h +++ b/i386/include/mach/i386/syscall_sw.h @@ -29,16 +29,16 @@#include <mach/machine/asm.h> -#if BSD_TRAP-#define kernel_trap(trap_name,trap_number,number_args) \ -ENTRY(trap_name) \ - movl $ trap_number,%eax; \ - SVC; \ - jb LCL(cerror); \ - ret; \ +#if defined(__x86_64__) && ! defined(USER32) +#define kernel_trap(trap_name,trap_number,number_args) \ +ENTRY(trap_name) \ + movq $ trap_number,%rax; \+ movq %rcx,%r10; \What is that for?
The syscall instruction automatically stores RIP in RCX, but RCX is also the place for the 4th arg passed to a function, so we need another register to store it. In this case R10 is the only non-callee-preserved register remaining. In the syscall64 code below, this value is moved back to RCX after saving the thread state.
+ syscall; \ + ret; \ END(trap_name) #else -#define kernel_trap(trap_name,trap_number,number_args) \ +#define kernel_trap(trap_name,trap_number,number_args) \ ENTRY(trap_name) \ movl $ trap_number,%eax; \ SVC; \ diff --git a/x86_64/locore.S b/x86_64/locore.S index 47d9085c..fdf7300b 100644 --- a/x86_64/locore.S +++ b/x86_64/locore.S @@ -1281,6 +1281,142 @@ DATA(cpu_features_ecx)END(syscall) ++/* Entry point for 64-bit syscalls. + * On entry we're still on the user stack, so better not use it. Instead we + * save the thread state immediately in thread->pcb->iss, then try to invoke + * the syscall. + * TODO: + - for now we assume the return address is canonical, but apparently there + can be cases where it's not (see how Linux handles this). Does it apply + here? + - do we need to check for ast on syscalls? Maybe on interrupts is enough + - check that the case where a task is suspended, and later returns via + iretq from return_from_trap, works fine in all combinations + - emulated syscalls - are they used anywhere?Not that I know of.
Ok, I'll update the comment about emulated syscalls.
+ */ +ENTRY(syscall64) + /* RFLAGS[32:63] are reserved, so combine syscall num (32 bit) and + * eflags in RAX to allow using r11 as temporary register */ + shlq $32,%r11 + shlq $32,%rax /* make sure bits 32:63 of %rax are zero */ + shrq $32,%rax + or %r11,%rax + + /* Save thread state in pcb->iss, as on exception entry. + * Since this is triggered synchronously from userspace, we can + * save only the callee-preserved status according to the C ABI, + * plus RIP and EFLAGS for sysret */ + CPU_NUMBER(%r11) + movq CX(EXT(active_threads),%r11),%r11 /* point to current thread */ + movq TH_PCB(%r11),%r11 /* point to pcb */ + addq $ PCB_ISS,%r11 /* point to saved state */ + + mov %gs,R_GS(%r11) + mov %fs,R_FS(%r11) + mov %rsp,R_UESP(%r11) /* callee-preserved register */ + mov %rcx,R_EIP(%r11) /* syscall places user RIP in RCX */ + mov %rbx,R_EBX(%r11) /* callee-preserved register */ + mov %rax,%rbx /* Now we can unpack eflags again */ + shr $32,%rbx + mov %rbx,R_EFLAGS(%r11) /* ... and save them in pcb as well */ + mov %rbp,R_EBP(%r11) /* callee-preserved register */ + mov %r12,R_R12(%r11) /* callee-preserved register */ + mov %r13,R_R13(%r11) /* callee-preserved register */ + mov %r14,R_R14(%r11) /* callee-preserved register */ + mov %r15,R_R15(%r11) /* callee-preserved register */ + mov %r11,%rbx /* prepare for error handling */ + mov %r10,%rcx /* fix arg3 location according to C ABI */ + + /* switch to kernel stack */ + CPU_NUMBER(%r11) + movq CX(EXT(kernel_stack),%r11),%rsp + + /* Now we have saved state and args 1-6 are in place. + * Before invoking the syscall we do some bound checking and, + * if we have more that 6 arguments, we need to copy the + * remaining ones to the kernel stack, handling page faults when + * accessing the user stack. + */ + shlq $32,%rax /* make sure bits 32:63 of %rax are zero */ + shrq $32,%rax + negl %eax /* get system call number */ + jl _syscall64_range /* out of range if it was positive */ + cmpl EXT(mach_trap_count),%eax /* check system call table bounds */ + jg _syscall64_range /* error if out of range */ + shll $5,%eax /* manual indexing of mach_trap_t */ + + /* check if we need to place some arguments on the stack */ +_syscall64_args_stack: + mov EXT(mach_trap_table)(%rax),%r10 /* get number of arguments */ + subq $6,%r10 /* the first 6 args are already in place */ + jl _syscall64_call /* skip argument copy if >6 args */jle?
Right, I didn't test a 6-args syscall.
+ + movq R_UESP(%rbx),%r11 /* get user stack pointer */ + addq $8,%r11 /* Skip user return address */ + + mov $USER_DS,%r12 /* use user data segment for accesses */ + mov %r12,%fs + + lea (%r11,%r10,8),%r11 /* point past last argument */+ xorq %r12,%r12Why clearing it?
Actually no need to do it, it's overwritten later
+0: subq $8,%r11 + RECOVER(_syscall64_addr_push) + mov %fs:(%r11),%r12 + pushq %r12 /* push argument on stack */ + dec %r10 + jnz 0b /* loop for all remaining arguments */ + +_syscall64_call: + call *EXT(mach_trap_table)+8(%rax) /* call procedure */ + // XXX: check ast on exit? + + /* avoid leaking information in callee-clobbered registers */ + mov $0,%rdiRather xorq?
Will do. Thanks! Luca
[Prev in Thread] | Current Thread | [Next in Thread] |