[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point
From: |
Samuel Thibault |
Subject: |
Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point |
Date: |
Mon, 27 Feb 2023 23:02:13 +0100 |
User-agent: |
NeoMutt/20170609 (1.8.3) |
Luca Dariz, le lun. 27 févr. 2023 21:45:01 +0100, a ecrit:
> diff --git a/i386/i386/ldt.h b/i386/i386/ldt.h
> index b15f11a5..4490f99f 100644
> --- a/i386/i386/ldt.h
> +++ b/i386/i386/ldt.h
> @@ -45,9 +45,14 @@
> #define USER_SCALL 0x07 /* system call gate */
> #ifdef __x86_64__
> /* Call gate needs two entries */
> -#endif
> +
> +/* The sysret instruction puts some constraints on the user segment indexes
> */
> +#define USER_CS 0x1f /* user code segment */
> +#define USER_DS 0x17 /* user data segment */
I'd say we'd rather avoid changing them for the x86_64 && USER32 case?
> +#else
> #define USER_CS 0x17 /* user code segment */
> #define USER_DS 0x1f /* user data segment */
> +#endif
>
> #define LDTSZ 4
>
> diff --git a/i386/include/mach/i386/syscall_sw.h
> b/i386/include/mach/i386/syscall_sw.h
> index 86f6ff2f..20ef7c13 100644
> --- a/i386/include/mach/i386/syscall_sw.h
> +++ b/i386/include/mach/i386/syscall_sw.h
> @@ -29,16 +29,16 @@
>
> #include <mach/machine/asm.h>
>
> -#if BSD_TRAP
> -#define kernel_trap(trap_name,trap_number,number_args) \
> -ENTRY(trap_name) \
> - movl $ trap_number,%eax; \
> - SVC; \
> - jb LCL(cerror); \
> - ret; \
> +#if defined(__x86_64__) && ! defined(USER32)
> +#define kernel_trap(trap_name,trap_number,number_args) \
> +ENTRY(trap_name) \
> + movq $ trap_number,%rax; \
> + movq %rcx,%r10; \
What is that for?
> + syscall; \
> + ret; \
> END(trap_name)
> #else
> -#define kernel_trap(trap_name,trap_number,number_args) \
> +#define kernel_trap(trap_name,trap_number,number_args) \
> ENTRY(trap_name) \
> movl $ trap_number,%eax; \
> SVC; \
> diff --git a/x86_64/locore.S b/x86_64/locore.S
> index 47d9085c..fdf7300b 100644
> --- a/x86_64/locore.S
> +++ b/x86_64/locore.S
> @@ -1281,6 +1281,142 @@ DATA(cpu_features_ecx)
>
> END(syscall)
>
> +
> +/* Entry point for 64-bit syscalls.
> + * On entry we're still on the user stack, so better not use it. Instead we
> + * save the thread state immediately in thread->pcb->iss, then try to invoke
> + * the syscall.
> + * TODO:
> + - for now we assume the return address is canonical, but apparently
> there
> + can be cases where it's not (see how Linux handles this). Does it
> apply
> + here?
> + - do we need to check for ast on syscalls? Maybe on interrupts is enough
> + - check that the case where a task is suspended, and later returns via
> + iretq from return_from_trap, works fine in all combinations
> + - emulated syscalls - are they used anywhere?
Not that I know of.
> + */
> +ENTRY(syscall64)
> + /* RFLAGS[32:63] are reserved, so combine syscall num (32 bit) and
> + * eflags in RAX to allow using r11 as temporary register */
> + shlq $32,%r11
> + shlq $32,%rax /* make sure bits 32:63 of %rax are zero */
> + shrq $32,%rax
> + or %r11,%rax
> +
> + /* Save thread state in pcb->iss, as on exception entry.
> + * Since this is triggered synchronously from userspace, we can
> + * save only the callee-preserved status according to the C ABI,
> + * plus RIP and EFLAGS for sysret */
> + CPU_NUMBER(%r11)
> + movq CX(EXT(active_threads),%r11),%r11 /* point to current thread */
> + movq TH_PCB(%r11),%r11 /* point to pcb */
> + addq $ PCB_ISS,%r11 /* point to saved state */
> +
> + mov %gs,R_GS(%r11)
> + mov %fs,R_FS(%r11)
> + mov %rsp,R_UESP(%r11) /* callee-preserved register */
> + mov %rcx,R_EIP(%r11) /* syscall places user RIP in RCX */
> + mov %rbx,R_EBX(%r11) /* callee-preserved register */
> + mov %rax,%rbx /* Now we can unpack eflags again */
> + shr $32,%rbx
> + mov %rbx,R_EFLAGS(%r11) /* ... and save them in pcb as well */
> + mov %rbp,R_EBP(%r11) /* callee-preserved register */
> + mov %r12,R_R12(%r11) /* callee-preserved register */
> + mov %r13,R_R13(%r11) /* callee-preserved register */
> + mov %r14,R_R14(%r11) /* callee-preserved register */
> + mov %r15,R_R15(%r11) /* callee-preserved register */
> + mov %r11,%rbx /* prepare for error handling */
> + mov %r10,%rcx /* fix arg3 location according to C ABI
> */
> +
> + /* switch to kernel stack */
> + CPU_NUMBER(%r11)
> + movq CX(EXT(kernel_stack),%r11),%rsp
> +
> + /* Now we have saved state and args 1-6 are in place.
> + * Before invoking the syscall we do some bound checking and,
> + * if we have more that 6 arguments, we need to copy the
> + * remaining ones to the kernel stack, handling page faults when
> + * accessing the user stack.
> + */
> + shlq $32,%rax /* make sure bits 32:63 of %rax are
> zero */
> + shrq $32,%rax
> + negl %eax /* get system call number */
> + jl _syscall64_range /* out of range if it was positive */
> + cmpl EXT(mach_trap_count),%eax /* check system call table
> bounds */
> + jg _syscall64_range /* error if out of range */
> + shll $5,%eax /* manual indexing of mach_trap_t */
> +
> + /* check if we need to place some arguments on the stack */
> +_syscall64_args_stack:
> + mov EXT(mach_trap_table)(%rax),%r10 /* get number of arguments */
> + subq $6,%r10 /* the first 6 args are already in
> place */
> + jl _syscall64_call /* skip argument copy if >6 args */
jle?
> +
> + movq R_UESP(%rbx),%r11 /* get user stack pointer */
> + addq $8,%r11 /* Skip user return address */
> +
> + mov $USER_DS,%r12 /* use user data segment for accesses */
> + mov %r12,%fs
> +
> + lea (%r11,%r10,8),%r11 /* point past last argument */
> + xorq %r12,%r12
Why clearing it?
> +0: subq $8,%r11
> + RECOVER(_syscall64_addr_push)
> + mov %fs:(%r11),%r12
> + pushq %r12 /* push argument on stack */
> + dec %r10
> + jnz 0b /* loop for all remaining arguments */
> +
> +_syscall64_call:
> + call *EXT(mach_trap_table)+8(%rax) /* call procedure */
> + // XXX: check ast on exit?
> +
> + /* avoid leaking information in callee-clobbered registers */
> + mov $0,%rdi
Rather xorq?
> + mov $0,%rsi
> + mov $0,%rdx
> + mov $0,%r10
> + mov $0,%r9
> + mov $0,%r8
> +
> + /* restore thread state and return to user using sysret */
> + CPU_NUMBER(%r11)
> + movq CX(EXT(active_threads),%r11),%r11 /* point to current thread */
> + movq TH_PCB(%r11),%r11 /* point to pcb */
> + addq $ PCB_ISS,%r11 /* point to saved state */
> +
> + mov R_GS(%r11),%gs
> + mov R_FS(%r11),%fs
> + mov R_UESP(%r11),%rsp /* callee-preserved register,
> + * switch to user stack */
> + mov R_EIP(%r11),%rcx /* sysret convention */
> + mov R_EBX(%r11),%rbx /* callee-preserved register */
> + mov R_EBP(%r11),%rbp /* callee-preserved register */
> + mov R_R12(%r11),%r12 /* callee-preserved register */
> + mov R_R13(%r11),%r13 /* callee-preserved register */
> + mov R_R14(%r11),%r14 /* callee-preserved register */
> + mov R_R15(%r11),%r15 /* callee-preserved register */
> + mov R_EFLAGS(%r11),%r11 /* sysret convention */
> +
> + sysretq /* fast return to user-space, the thread didn't block */
> +
> +/* Error handling fragments, from here we jump directly to the trap handler
> */
> +_syscall64_addr_push:
> + movq %rbx,%rsp /* clean parameters from stack */
> + movq %r11,R_CR2(%rbx) /* set fault address */
> + movq $(T_PAGE_FAULT),R_TRAPNO(%rbx) /* set page-fault trap */
> + movq $(T_PF_USER),R_ERR(%rbx) /* set error code - read user space */
> + jmp _take_trap /* treat as a trap */
> +
> +_syscall64_range:
> + movq $(T_INVALID_OPCODE),R_TRAPNO(%rbx)
> + /* set invalid-operation trap */
> + movq $0,R_ERR(%rbx) /* clear error code */
> + jmp _take_trap /* treat as a trap */
> +
> +END(syscall64)
> +
> /* Discover what kind of cpu we have; return the family number
> (3, 4, 5, 6, for 386, 486, 586, 686 respectively). */
> ENTRY(discover_x86_cpu_type)
> --
> 2.30.2
>
>
--
Samuel
---
Pour une évaluation indépendante, transparente et rigoureuse !
Je soutiens la Commission d'Évaluation de l'Inria.
- [PATCH 0/5] basic syscall support on x86_64, Luca Dariz, 2023/02/27
- [PATCH 4/5] x86_64: fix user trap during syscall with an invalid user stack, Luca Dariz, 2023/02/27
- [PATCH 3/5] fix port name copyin, Luca Dariz, 2023/02/27
- [PATCH 1/5] x86_64: allow compilation if ! USER32, Luca Dariz, 2023/02/27
- [PATCH 2/5] fix copyin/outmsg header for ! USER32, Luca Dariz, 2023/02/27
- [PATCH 5/5] x86_64: add 64-bit syscall entry point, Luca Dariz, 2023/02/27
- Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point,
Samuel Thibault <=
- Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point, Luca Dariz, 2023/02/28
- Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point, Samuel Thibault, 2023/02/28
- Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point, Sergey Bugaev, 2023/02/28
- Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point, Samuel Thibault, 2023/02/28
- Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point, Sergey Bugaev, 2023/02/28
- Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point, Samuel Thibault, 2023/02/28
- Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point, Sergey Bugaev, 2023/02/28
Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point, Sergey Bugaev, 2023/02/28