Index: qemu/linux-user/main.c =================================================================== --- qemu.orig/linux-user/main.c 2007-10-15 13:52:13.000000000 -0600 +++ qemu/linux-user/main.c 2007-10-15 13:52:29.000000000 -0600 @@ -156,7 +156,7 @@ p[1] = tswapl(e2); } -uint64_t gdt_table[6]; +uint64_t gdt_table[9]; uint64_t idt_table[256]; /* only dpl matters as we do only user space emulation */ Index: qemu/linux-user/syscall.c =================================================================== --- qemu.orig/linux-user/syscall.c 2007-10-15 13:52:13.000000000 -0600 +++ qemu/linux-user/syscall.c 2007-10-15 13:52:30.000000000 -0600 @@ -183,6 +183,7 @@ #define __NR_sys_symlinkat __NR_symlinkat #define __NR_sys_syslog __NR_syslog #define __NR_sys_tgkill __NR_tgkill +#define __NR_sys_clone __NR_clone #define __NR_sys_tkill __NR_tkill #define __NR_sys_unlinkat __NR_unlinkat #define __NR_sys_utimensat __NR_utimensat @@ -258,6 +259,7 @@ #if defined(TARGET_NR_tgkill) && defined(__NR_tgkill) _syscall3(int,sys_tgkill,int,tgid,int,pid,int,sig) #endif +_syscall5(int,sys_clone, int, flags, void *, child_stack, int *, parent_tidptr, struct user_desc *, newtls, int *, child_tidptr) #if defined(TARGET_NR_tkill) && defined(__NR_tkill) _syscall2(int,sys_tkill,int,tid,int,sig) #endif @@ -2416,6 +2418,81 @@ return ret; } +int do_set_thread_area(CPUX86State *env, target_ulong ptr) +{ + uint64_t *gdt_table = g2h(env->gdt.base); + struct target_modify_ldt_ldt_s ldt_info; + struct target_modify_ldt_ldt_s *target_ldt_info; + int seg_32bit, contents, read_exec_only, limit_in_pages; + int seg_not_present, useable; + uint32_t *lp, entry_1, entry_2; + int i; + SegmentCache *sc = &env->segs[R_GS]; + + lock_user_struct(target_ldt_info, ptr, 1); + ldt_info.entry_number = tswap32(target_ldt_info->entry_number); + ldt_info.base_addr = tswapl(target_ldt_info->base_addr); + ldt_info.limit = tswap32(target_ldt_info->limit); + ldt_info.flags = tswap32(target_ldt_info->flags); + if (ldt_info.entry_number == -1) { + for (i=6; i<8; i++) + if (gdt_table[i] == 0) { + ldt_info.entry_number = i; + target_ldt_info->entry_number = tswap32(i); + break; + } + } + unlock_user_struct(target_ldt_info, ptr, 0); + + if (ldt_info.entry_number < 6 || ldt_info.entry_number > 8) + return -EINVAL; + seg_32bit = ldt_info.flags & 1; + contents = (ldt_info.flags >> 1) & 3; + read_exec_only = (ldt_info.flags >> 3) & 1; + limit_in_pages = (ldt_info.flags >> 4) & 1; + seg_not_present = (ldt_info.flags >> 5) & 1; + useable = (ldt_info.flags >> 6) & 1; + + if (contents == 3) { + if (seg_not_present == 0) + return -EINVAL; + } + + /* NOTE: same code as Linux kernel */ + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + if ((contents == 0 && + read_exec_only == 1 && + seg_32bit == 0 && + limit_in_pages == 0 && + seg_not_present == 1 && + useable == 0 )) { + entry_1 = 0; + entry_2 = 0; + goto install; + } + } + + entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | + (ldt_info.limit & 0x0ffff); + entry_2 = (ldt_info.base_addr & 0xff000000) | + ((ldt_info.base_addr & 0x00ff0000) >> 16) | + (ldt_info.limit & 0xf0000) | + ((read_exec_only ^ 1) << 9) | + (contents << 10) | + ((seg_not_present ^ 1) << 15) | + (seg_32bit << 22) | + (limit_in_pages << 23) | + (useable << 20) | + 0x7000; + + /* Install the new entry ... */ +install: + lp = (uint32_t *)(gdt_table + ldt_info.entry_number); + lp[0] = tswap32(entry_1); + lp[1] = tswap32(entry_2); + return 0; +} #endif /* defined(TARGET_I386) */ /* this stack is the equivalent of the kernel stack associated with a @@ -2426,40 +2503,62 @@ static spinlock_t nptl_lock = SPIN_LOCK_UNLOCKED; #endif -static int clone_func(void *arg) +static int clone_func(CPUState *cloneenv) { - CPUState *env = arg; #ifdef USE_NPTL /* Wait until the parent has finshed initializing the tls state. */ while (!spin_trylock(&nptl_lock)) usleep(1); spin_unlock(&nptl_lock); #endif - cpu_loop(env); + cpu_loop(cloneenv); /* never exits */ return 0; } +#ifdef __ia64__ +#define clone(...) __clone2(__VA_ARGS__) +#endif + /* do_fork() Must return host values and target errnos (unlike most * do_*() functions). */ int do_fork(CPUState *env, unsigned int flags, target_ulong newsp, - uint32_t *parent_tidptr, void *newtls, - uint32_t *child_tidptr) + target_ulong parent_tidptr, target_ulong newtls, + target_ulong child_tidptr) { int ret; TaskState *ts; uint8_t *new_stack; CPUState *new_env; #ifdef USE_NPTL + unsigned long parent_tid=gettid(); +#if defined(TARGET_I386) + uint64_t *new_gdt_table; +#endif unsigned int nptl_flags; - if (flags & CLONE_PARENT_SETTID) - *parent_tidptr = gettid(); + /* check for invalid combinations */ + if (((flags & CLONE_PARENT_SETTID) && !parent_tidptr) + || ((flags & CLONE_CHILD_SETTID) && !child_tidptr)) + return -EINVAL; + + if (flags & CLONE_CHILD_SETTID + && !access_ok(VERIFY_WRITE, child_tidptr, sizeof(target_ulong))) + return -EFAULT; + + if (flags & CLONE_PARENT_SETTID + && !access_ok(VERIFY_WRITE, parent_tidptr, sizeof(target_ulong))) { + return -EFAULT; + if (flags & CLONE_PARENT_SETTID) + tput32(parent_tidptr, parent_tid); + } #endif if (flags & CLONE_VM) { ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE); + if (!ts) + return -ENOMEM; memset(ts, 0, sizeof(TaskState)); new_stack = ts->stack; ts->used = 1; @@ -2471,6 +2570,31 @@ #if defined(TARGET_I386) if (!newsp) newsp = env->regs[R_ESP]; +#ifdef USE_NPTL + new_gdt_table = malloc(9 * 8); + if (!new_gdt_table) { + free(ts); + free(new_env); + return -ENOMEM; + } + /* Copy main GDT table from parent, but clear TLS entries */ + memcpy(new_gdt_table, g2h(env->gdt.base), 6 * 8); + memset(&new_gdt_table[6], 0, 3 * 8); + new_env->gdt.base = h2g(new_gdt_table); + if (flags & CLONE_SETTLS) { + ret = do_set_thread_area(new_env, newtls); + if (ret) { + free(ts); + free(new_env); + free(new_gdt_table); + return ret; + } + } +#endif /* USE_NPTL */ + + cpu_x86_load_seg(new_env, R_FS, new_env->segs[R_FS].selector); + cpu_x86_load_seg(new_env, R_GS, new_env->segs[R_GS].selector); + new_env->regs[R_ESP] = newsp; new_env->regs[R_EAX] = 0; #elif defined(TARGET_ARM) @@ -2540,18 +2664,22 @@ spin_lock(&nptl_lock); #else - if (flags & CLONE_NPTL_FLAGS2) + if (flags & CLONE_NPTL_FLAGS2) { + free(ts); + free(new_env); +#ifdef USE_NPTL + free(new_gdt_table); +#endif return -EINVAL; + } #endif -#ifdef __ia64__ - ret = __clone2(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env); -#else ret = clone(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env); -#endif #ifdef USE_NPTL - if (ret != -1) { - if (nptl_flags & CLONE_CHILD_SETTID) - *child_tidptr = ret; + if (ret != -1 && nptl_flags & CLONE_CHILD_SETTID) { + if (ret==0) // only in client memory for fork() + tput32(child_tidptr, gettid()); + else if (flags & CLONE_VM) // real threads need it too + tput32(child_tidptr, ret); } /* Allow the child to continue. */ @@ -2562,7 +2690,7 @@ /* if no CLONE_VM, we consider it is a fork */ if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) return -EINVAL; - ret = fork(); + ret = sys_clone(flags & ~CLONE_NPTL_FLAGS2, 0, g2h(parent_tidptr), NULL, g2h(child_tidptr)); #ifdef USE_NPTL /* There is a race condition here. The parent process could theoretically read the TID in the child process before the child @@ -2570,10 +2698,13 @@ (not implemented) or having *_tidptr to point at a shared memory mapping. We can't repeat the spinlock hack used above because the child process gets its own copy of the lock. */ + if (flags & CLONE_CHILD_SETTID) { + if (ret == 0) + tput32(child_tidptr, gettid()); + else if (flags & CLONE_VM) + tput32(child_tidptr, ret); + } if (ret == 0) { - /* Child Process. */ - if (flags & CLONE_CHILD_SETTID) - *child_tidptr = gettid(); ts = (TaskState *)env->opaque; if (flags & CLONE_CHILD_CLEARTID) ts->child_tidptr = child_tidptr; @@ -2585,6 +2716,10 @@ return ret; } +#ifdef __ia64__ +#undef clone +#endif + static target_long do_fcntl(int fd, int cmd, target_ulong arg) { struct flock fl; @@ -2940,7 +3075,7 @@ _mcleanup(); #endif gdb_exit(cpu_env, arg1); - /* XXX: should free thread stack and CPU env */ + /* XXX: should free thread stack, GDT and CPU env */ _exit(arg1); ret = 0; /* avoid warning */ break; @@ -5420,12 +5555,12 @@ #ifdef TARGET_NR_set_thread_area case TARGET_NR_set_thread_area: #ifdef TARGET_MIPS - ((CPUMIPSState *) cpu_env)->tls_value = arg1; - ret = 0; - break; + ((CPUMIPSState *) cpu_env)->tls_value = arg1; + ret = 0; #else - goto unimplemented_nowarn; + ret = get_errno(do_set_thread_area(cpu_env, arg1)); #endif + break; #endif #ifdef TARGET_NR_get_thread_area case TARGET_NR_get_thread_area: Index: qemu/configure =================================================================== --- qemu.orig/configure 2007-10-15 13:52:13.000000000 -0600 +++ qemu/configure 2007-10-15 13:52:13.000000000 -0600 @@ -1153,7 +1153,7 @@ else if test "$nptl" = "yes" ; then case "$target_cpu" in - arm | armeb | ppc | ppc64) + arm | armeb | i386 | ppc | ppc64) echo "#define USE_NPTL 1" >> $config_h ;; esac Index: qemu/target-i386/cpu.h =================================================================== --- qemu.orig/target-i386/cpu.h 2007-10-15 13:52:06.000000000 -0600 +++ qemu/target-i386/cpu.h 2007-10-15 13:52:13.000000000 -0600 @@ -567,6 +567,9 @@ int cpu_get_pic_interrupt(CPUX86State *s); /* MSDOS compatibility mode FPU exception support */ void cpu_set_ferr(CPUX86State *s); +#if defined(USE_NPTL) +#define cpu_set_tls(...) do {} while(0) +#endif /* this function must always be used to load data in the segment cache: it synchronizes the hflags with the segment cache values */