qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [patch] NPTL/TLS support


From: Paul Brook
Subject: [Qemu-devel] [patch] NPTL/TLS support
Date: Sun, 14 Aug 2005 16:53:25 +0100
User-agent: KMail/1.7.2

The attached patch adds partial support for ARM NPTL binaries.

It implements the following things:

- The Arm magic kernel code page. This is used on recent Arm kernels to 
provide efficient access to kernel/CPU features. For example atomic 
operations, and reading the TLS register (which may or may not be a real 
hardware register). This is simplified version of the VDSO used on recent x86 
and ppc kernels.
- A new Arm specific syscall to set the TLS register.
- Support for the 5-argument form of sys_clone.

This is sufficient to run single-threaded NPTL enabled binaries. I've not yet 
implemented the futex syscalls, so multithreaded applications probaby won't 
work.

NPTL support is only enabled for ARM.  Other architectures may need 
architecture specific bits implementing before they can be enabled.

Paul
Index: configure
===================================================================
RCS file: /cvsroot/qemu/qemu/configure,v
retrieving revision 1.73
diff -u -p -r1.73 configure
--- configure   28 Jul 2005 21:45:38 -0000      1.73
+++ configure   14 Aug 2005 14:40:29 -0000
@@ -85,6 +85,7 @@ kqemu="no"
 kernel_path=""
 cocoa="no"
 check_gfx="yes"
+nptl="yes"
 
 # OS specific
 targetos=`uname -s`
@@ -191,6 +192,8 @@ for opt do
   ;; 
   --disable-gfx-check) check_gfx="no"
   ;;
+  --disabe-nptl) nptl="no"
+  ;;
   esac
 done
 
@@ -349,6 +352,7 @@ echo "  --enable-adlib           enable 
 echo "  --enable-fmod            enable FMOD audio output driver"
 echo "  --fmod-lib               path to FMOD library"
 echo "  --fmod-inc               path to FMOD includes"
+echo "  --disable-nptl           Disable NPTL user-mode emulation"
 echo ""
 echo "NOTE: The object files are build at the place where configure is 
launched"
 exit 1
@@ -736,6 +740,14 @@ if test "$target_user_only" = "no"; then
             echo -n " `aalib-config --cflags`" >> $config_mak ;
         fi
         echo "" >> $config_mak
+    fi
+else
+    if test "$nptl" = "yes" ; then
+        case "$target_cpu" in
+          arm | armeb)
+            echo "#define USE_NPTL 1" >> $config_h
+          ;;
+        esac
     fi
 fi
 
Index: exec-all.h
===================================================================
RCS file: /cvsroot/qemu/qemu/exec-all.h,v
retrieving revision 1.34
diff -u -p -r1.34 exec-all.h
--- exec-all.h  24 Jul 2005 14:14:53 -0000      1.34
+++ exec-all.h  14 Aug 2005 14:40:29 -0000
@@ -374,163 +374,7 @@ extern CPUWriteMemoryFunc *io_mem_write[
 extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
 extern void *io_mem_opaque[IO_MEM_NB_ENTRIES];
 
-#ifdef __powerpc__
-static inline int testandset (int *p)
-{
-    int ret;
-    __asm__ __volatile__ (
-                          "0:    lwarx %0,0,%1\n"
-                          "      xor. %0,%3,%0\n"
-                          "      bne 1f\n"
-                          "      stwcx. %2,0,%1\n"
-                          "      bne- 0b\n"
-                          "1:    "
-                          : "=&r" (ret)
-                          : "r" (p), "r" (1), "r" (0)
-                          : "cr0", "memory");
-    return ret;
-}
-#endif
-
-#ifdef __i386__
-static inline int testandset (int *p)
-{
-    long int readval = 0;
-    
-    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
-                          : "+m" (*p), "+a" (readval)
-                          : "r" (1)
-                          : "cc");
-    return readval;
-}
-#endif
-
-#ifdef __x86_64__
-static inline int testandset (int *p)
-{
-    long int readval = 0;
-    
-    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
-                          : "+m" (*p), "+a" (readval)
-                          : "r" (1)
-                          : "cc");
-    return readval;
-}
-#endif
-
-#ifdef __s390__
-static inline int testandset (int *p)
-{
-    int ret;
-
-    __asm__ __volatile__ ("0: cs    %0,%1,0(%2)\n"
-                         "   jl    0b"
-                         : "=&d" (ret)
-                         : "r" (1), "a" (p), "0" (*p) 
-                         : "cc", "memory" );
-    return ret;
-}
-#endif
-
-#ifdef __alpha__
-static inline int testandset (int *p)
-{
-    int ret;
-    unsigned long one;
-
-    __asm__ __volatile__ ("0:  mov 1,%2\n"
-                         "     ldl_l %0,%1\n"
-                         "     stl_c %2,%1\n"
-                         "     beq %2,1f\n"
-                         ".subsection 2\n"
-                         "1:   br 0b\n"
-                         ".previous"
-                         : "=r" (ret), "=m" (*p), "=r" (one)
-                         : "m" (*p));
-    return ret;
-}
-#endif
-
-#ifdef __sparc__
-static inline int testandset (int *p)
-{
-       int ret;
-
-       __asm__ __volatile__("ldstub    [%1], %0"
-                            : "=r" (ret)
-                            : "r" (p)
-                            : "memory");
-
-       return (ret ? 1 : 0);
-}
-#endif
-
-#ifdef __arm__
-static inline int testandset (int *spinlock)
-{
-    register unsigned int ret;
-    __asm__ __volatile__("swp %0, %1, [%2]"
-                         : "=r"(ret)
-                         : "0"(1), "r"(spinlock));
-    
-    return ret;
-}
-#endif
-
-#ifdef __mc68000
-static inline int testandset (int *p)
-{
-    char ret;
-    __asm__ __volatile__("tas %1; sne %0"
-                         : "=r" (ret)
-                         : "m" (p)
-                         : "cc","memory");
-    return ret;
-}
-#endif
-
-#ifdef __ia64
-#include <ia64intrin.h>
-
-static inline int testandset (int *p)
-{
-    return __sync_lock_test_and_set (p, 1);
-}
-#endif
-
-typedef int spinlock_t;
-
-#define SPIN_LOCK_UNLOCKED 0
-
-#if defined(CONFIG_USER_ONLY)
-static inline void spin_lock(spinlock_t *lock)
-{
-    while (testandset(lock));
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-    *lock = 0;
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
-    return !testandset(lock);
-}
-#else
-static inline void spin_lock(spinlock_t *lock)
-{
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
-    return 1;
-}
-#endif
+#include "spinlock.h"
 
 extern spinlock_t tb_lock;
 
Index: spinlock.h
===================================================================
RCS file: spinlock.h
diff -N spinlock.h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ spinlock.h  14 Aug 2005 14:40:29 -0000
@@ -0,0 +1,181 @@
+/*
+ * Atomic operation helper include
+ * 
+ *  Copyright (c) 2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef SPINLOCK_H
+#define SPINLOCK_H
+
+#ifdef __powerpc__
+static inline int testandset (int *p)
+{
+    int ret;
+    __asm__ __volatile__ (
+                          "0:    lwarx %0,0,%1\n"
+                          "      xor. %0,%3,%0\n"
+                          "      bne 1f\n"
+                          "      stwcx. %2,0,%1\n"
+                          "      bne- 0b\n"
+                          "1:    "
+                          : "=&r" (ret)
+                          : "r" (p), "r" (1), "r" (0)
+                          : "cr0", "memory");
+    return ret;
+}
+#endif
+
+#ifdef __i386__
+static inline int testandset (int *p)
+{
+    long int readval = 0;
+    
+    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+                          : "+m" (*p), "+a" (readval)
+                          : "r" (1)
+                          : "cc");
+    return readval;
+}
+#endif
+
+#ifdef __x86_64__
+static inline int testandset (int *p)
+{
+    long int readval = 0;
+    
+    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+                          : "+m" (*p), "+a" (readval)
+                          : "r" (1)
+                          : "cc");
+    return readval;
+}
+#endif
+
+#ifdef __s390__
+static inline int testandset (int *p)
+{
+    int ret;
+
+    __asm__ __volatile__ ("0: cs    %0,%1,0(%2)\n"
+                         "   jl    0b"
+                         : "=&d" (ret)
+                         : "r" (1), "a" (p), "0" (*p) 
+                         : "cc", "memory" );
+    return ret;
+}
+#endif
+
+#ifdef __alpha__
+static inline int testandset (int *p)
+{
+    int ret;
+    unsigned long one;
+
+    __asm__ __volatile__ ("0:  mov 1,%2\n"
+                         "     ldl_l %0,%1\n"
+                         "     stl_c %2,%1\n"
+                         "     beq %2,1f\n"
+                         ".subsection 2\n"
+                         "1:   br 0b\n"
+                         ".previous"
+                         : "=r" (ret), "=m" (*p), "=r" (one)
+                         : "m" (*p));
+    return ret;
+}
+#endif
+
+#ifdef __sparc__
+static inline int testandset (int *p)
+{
+       int ret;
+
+       __asm__ __volatile__("ldstub    [%1], %0"
+                            : "=r" (ret)
+                            : "r" (p)
+                            : "memory");
+
+       return (ret ? 1 : 0);
+}
+#endif
+
+#ifdef __arm__
+static inline int testandset (int *spinlock)
+{
+    register unsigned int ret;
+    __asm__ __volatile__("swp %0, %1, [%2]"
+                         : "=r"(ret)
+                         : "0"(1), "r"(spinlock));
+    
+    return ret;
+}
+#endif
+
+#ifdef __mc68000
+static inline int testandset (int *p)
+{
+    char ret;
+    __asm__ __volatile__("tas %1; sne %0"
+                         : "=r" (ret)
+                         : "m" (p)
+                         : "cc","memory");
+    return ret;
+}
+#endif
+
+#ifdef __ia64
+#include <ia64intrin.h>
+
+static inline int testandset (int *p)
+{
+    return __sync_lock_test_and_set (p, 1);
+}
+#endif
+
+typedef int spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED 0
+
+#if defined(CONFIG_USER_ONLY)
+static inline void spin_lock(spinlock_t *lock)
+{
+    while (testandset(lock));
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+    *lock = 0;
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+    return !testandset(lock);
+}
+#else
+static inline void spin_lock(spinlock_t *lock)
+{
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+    return 1;
+}
+#endif
+
+#endif
Index: linux-user/arm-semi.c
===================================================================
RCS file: /cvsroot/qemu/qemu/linux-user/arm-semi.c,v
retrieving revision 1.3
diff -u -p -r1.3 arm-semi.c
--- linux-user/arm-semi.c       13 May 2005 22:42:37 -0000      1.3
+++ linux-user/arm-semi.c       14 Aug 2005 14:40:29 -0000
@@ -178,7 +178,7 @@ uint32_t do_arm_semihosting(CPUState *en
                 ts->heap_limit = limit;
             }
               
-            ptr = (uint32_t *)tswap32(ARG(0));
+            ptr = (uint32_t *)ARG(0);
             ptr[0] = tswap32(ts->heap_base);
             ptr[1] = tswap32(ts->heap_limit);
             ptr[2] = tswap32(ts->stack_base);
Index: linux-user/elfload.c
===================================================================
RCS file: /cvsroot/qemu/qemu/linux-user/elfload.c,v
retrieving revision 1.26
diff -u -p -r1.26 elfload.c
--- linux-user/elfload.c        10 Feb 2005 22:00:52 -0000      1.26
+++ linux-user/elfload.c        14 Aug 2005 14:40:29 -0000
@@ -186,19 +186,17 @@ do {                                    
  * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC,
  *   even if DLINFO_ARCH_ITEMS goes to zero or is undefined.
  */
-#define DLINFO_ARCH_ITEMS       3
+#define DLINFO_ARCH_ITEMS       5
 #define ARCH_DLINFO                                                     \
 do {                                                                    \
-       sp -= DLINFO_ARCH_ITEMS * 2;                                    \
-        NEW_AUX_ENT(0, AT_DCACHEBSIZE, 0x20);                           \
-        NEW_AUX_ENT(1, AT_ICACHEBSIZE, 0x20);                           \
-        NEW_AUX_ENT(2, AT_UCACHEBSIZE, 0);                              \
+        NEW_AUX_ENT(AT_DCACHEBSIZE, 0x20);                              \
+        NEW_AUX_ENT(AT_ICACHEBSIZE, 0x20);                              \
+        NEW_AUX_ENT(AT_UCACHEBSIZE, 0);                                 \
         /*                                                              \
          * Now handle glibc compatibility.                              \
          */                                                             \
-       sp -= 2*2;                                                      \
-       NEW_AUX_ENT(0, AT_IGNOREPPC, AT_IGNOREPPC);                     \
-       NEW_AUX_ENT(1, AT_IGNOREPPC, AT_IGNOREPPC);                     \
+       NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);                        \
+       NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);                        \
  } while (0)
 
 static inline void init_thread(struct target_pt_regs *_regs, struct image_info 
*infop)
@@ -643,24 +641,24 @@ static unsigned int * create_elf_tables(
         if ((unsigned long)csp & 15UL)
             sp -= ((unsigned long)csp & 15UL) / sizeof(*sp);
         
-#define NEW_AUX_ENT(nr, id, val) \
-          put_user (id, sp + (nr * 2)); \
-          put_user (val, sp + (nr * 2 + 1))
-        sp -= 2;
-        NEW_AUX_ENT (0, AT_NULL, 0);
-
-       sp -= DLINFO_ITEMS*2;
-        NEW_AUX_ENT( 0, AT_PHDR, (target_ulong)(load_addr + exec->e_phoff));
-        NEW_AUX_ENT( 1, AT_PHENT, (target_ulong)(sizeof (struct elf_phdr)));
-        NEW_AUX_ENT( 2, AT_PHNUM, (target_ulong)(exec->e_phnum));
-        NEW_AUX_ENT( 3, AT_PAGESZ, (target_ulong)(TARGET_PAGE_SIZE));
-        NEW_AUX_ENT( 4, AT_BASE, (target_ulong)(interp_load_addr));
-        NEW_AUX_ENT( 5, AT_FLAGS, (target_ulong)0);
-        NEW_AUX_ENT( 6, AT_ENTRY, load_bias + exec->e_entry);
-        NEW_AUX_ENT( 7, AT_UID, (target_ulong) getuid());
-        NEW_AUX_ENT( 8, AT_EUID, (target_ulong) geteuid());
-        NEW_AUX_ENT( 9, AT_GID, (target_ulong) getgid());
-        NEW_AUX_ENT(11, AT_EGID, (target_ulong) getegid());
+#define NEW_AUX_ENT(id, val) \
+          sp -= 2; \
+          put_user (id, sp); \
+          put_user (val, sp + 1)
+        NEW_AUX_ENT (AT_NULL, 0);
+
+        /* There must be exactly DLINFO_ITEMS entries here.  */
+        NEW_AUX_ENT(AT_PHDR, (target_ulong)(load_addr + exec->e_phoff));
+        NEW_AUX_ENT(AT_PHENT, (target_ulong)(sizeof (struct elf_phdr)));
+        NEW_AUX_ENT(AT_PHNUM, (target_ulong)(exec->e_phnum));
+        NEW_AUX_ENT(AT_PAGESZ, (target_ulong)(TARGET_PAGE_SIZE));
+        NEW_AUX_ENT(AT_BASE, (target_ulong)(interp_load_addr));
+        NEW_AUX_ENT(AT_FLAGS, (target_ulong)0);
+        NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry);
+        NEW_AUX_ENT(AT_UID, (target_ulong) getuid());
+        NEW_AUX_ENT(AT_EUID, (target_ulong) geteuid());
+        NEW_AUX_ENT(AT_GID, (target_ulong) getgid());
+        NEW_AUX_ENT(AT_EGID, (target_ulong) getegid());
 #ifdef ARCH_DLINFO
        /* 
         * ARCH_DLINFO must come last so platform specific code can enforce
Index: linux-user/main.c
===================================================================
RCS file: /cvsroot/qemu/qemu/linux-user/main.c,v
retrieving revision 1.70
diff -u -p -r1.70 main.c
--- linux-user/main.c   24 Jul 2005 18:44:56 -0000      1.70
+++ linux-user/main.c   14 Aug 2005 14:40:30 -0000
@@ -326,6 +326,46 @@ static void arm_cache_flush(target_ulong
     }
 }
 
+/* Handle a jump to the kernel code page.  */
+static int
+do_kernel_trap(CPUARMState *env)
+{
+    uint32_t addr;
+    uint32_t *ptr;
+
+    switch (env->regs[15]) {
+    case 0xffff0fc0: /* __kernel_cmpxchg */
+        /* XXX: This only works between threads, not between processes.
+           Use native atomic operations.  */
+        cpu_lock();
+        ptr = (uint32_t *)env->regs[2];
+        if (*ptr == env->regs[0]) {
+            *ptr = env->regs[1];
+            env->regs[0] = 0;
+            env->cpsr |= (1 << 29);
+        } else {
+            env->regs[0] = -1;
+            env->cpsr |= ~(1 << 29);
+        }
+        cpu_unlock();
+        break;
+    case 0xffff0fe0: /* __kernel_get_tls */
+        env->regs[0] = env->tls;
+        break;
+    default:
+        return 1;
+    }
+    /* Jump back to the caller.  */
+    addr = env->regs[14];
+    if (addr & 1) {
+        env->thumb = 1;
+        addr &= ~1;
+    }
+    env->regs[15] = addr;
+
+    return 0;
+}
+
 void cpu_loop(CPUARMState *env)
 {
     int trapnr;
@@ -367,10 +407,8 @@ void cpu_loop(CPUARMState *env)
                     n = insn & 0xffffff;
                 }
 
-                if (n == ARM_NR_cacheflush) {
-                    arm_cache_flush(env->regs[0], env->regs[1]);
-                } else if (n == ARM_NR_semihosting
-                           || n == ARM_NR_thumb_semihosting) {
+                if (n == ARM_NR_semihosting
+                    || n == ARM_NR_thumb_semihosting) {
                     env->regs[0] = do_arm_semihosting (env);
                 } else if (n >= ARM_SYSCALL_BASE
                            || (env->thumb && n == ARM_THUMB_SYSCALL)) {
@@ -380,14 +418,36 @@ void cpu_loop(CPUARMState *env)
                     } else {
                         n -= ARM_SYSCALL_BASE;
                     }
-                    env->regs[0] = do_syscall(env, 
-                                              n, 
-                                              env->regs[0],
-                                              env->regs[1],
-                                              env->regs[2],
-                                              env->regs[3],
-                                              env->regs[4],
-                                              env->regs[5]);
+                    if ( n > ARM_NR_BASE) {
+                        switch (n)
+                          {
+                          case ARM_NR_cacheflush:
+                              arm_cache_flush(env->regs[0], env->regs[1]);
+                              break;
+#ifdef USE_NPTL
+                          case ARM_NR_set_tls:
+                              env->tls = env->regs[0];
+                              env->regs[0] = 0;
+                              break;
+#endif
+                          default:
+                              printf ("Error: %x\n", n);
+                              abort();
+                              env->regs[0] = -ENOSYS;
+                              break;
+                          }
+                      }
+                    else
+                      {
+                        env->regs[0] = do_syscall(env, 
+                                                  n, 
+                                                  env->regs[0],
+                                                  env->regs[1],
+                                                  env->regs[2],
+                                                  env->regs[3],
+                                                  env->regs[4],
+                                                  env->regs[5]);
+                      }
                 } else {
                     goto error;
                 }
@@ -421,6 +481,10 @@ void cpu_loop(CPUARMState *env)
                   }
             }
             break;
+        case EXCP_KERNEL_TRAP:
+            if (do_kernel_trap(env))
+              goto error;
+            break;
         default:
         error:
             fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n", 
@@ -1201,6 +1265,10 @@ int main(int argc, char **argv)
         ts->heap_base = info->brk;
         /* This will be filled in on the first SYS_HEAPINFO call.  */
         ts->heap_limit = 0;
+        /* Register the magic kernel code page.  The cpu will generate a
+           special exception when it tries to execute code here.  We can't
+           put real code here because it may be in use by the host kernel.  */
+        page_set_flags(0xffff0000, 0xffff0fff, 0);
     }
 #elif defined(TARGET_SPARC)
     {
Index: linux-user/qemu.h
===================================================================
RCS file: /cvsroot/qemu/qemu/linux-user/qemu.h,v
retrieving revision 1.24
diff -u -p -r1.24 qemu.h
--- linux-user/qemu.h   23 Apr 2005 18:25:40 -0000      1.24
+++ linux-user/qemu.h   14 Aug 2005 14:40:30 -0000
@@ -76,6 +76,9 @@ typedef struct TaskState {
     uint32_t v86mask;
 #endif
     int used; /* non zero if used */
+#ifdef USE_NPTL
+    uint32_t *child_tidptr;
+#endif
     uint8_t stack[0];
 } __attribute__((aligned(16))) TaskState;
 
Index: linux-user/syscall.c
===================================================================
RCS file: /cvsroot/qemu/qemu/linux-user/syscall.c,v
retrieving revision 1.62
diff -u -p -r1.62 syscall.c
--- linux-user/syscall.c        23 Jul 2005 15:10:20 -0000      1.62
+++ linux-user/syscall.c        14 Aug 2005 14:40:30 -0000
@@ -65,9 +65,18 @@
 #include <linux/kd.h>
 
 #include "qemu.h"
+#include "spinlock.h"
 
 //#define DEBUG
 
+#ifdef USE_NPTL
+#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
+    CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
+#else
+/* XXX: Hardcode the above values.  */
+#define CLONE_NPTL_FLAGS2 0
+#endif
+
 #if defined(TARGET_I386) || defined(TARGET_ARM) || defined(TARGET_SPARC)
 /* 16 bit uid wrappers emulation */
 #define USE_UID16
@@ -1459,20 +1468,38 @@ int do_modify_ldt(CPUX86State *env, int 
    thread/process */
 #define NEW_STACK_SIZE 8192
 
+#ifdef USE_NPTL
+static spinlock_t nptl_lock = SPIN_LOCK_UNLOCKED;
+#endif
+
 static int clone_func(void *arg)
 {
     CPUState *env = arg;
+#ifdef HAVE_NPTL
+    /* Wait until the parent has finshed initializing the tls state.  */
+    while (!spin_trylock(&nptl_lock))
+        usleep(1);
+    spin_unlock(&nptl_lock);
+#endif
     cpu_loop(env);
     /* never exits */
     return 0;
 }
 
-int do_fork(CPUState *env, unsigned int flags, unsigned long newsp)
+int do_fork(CPUState *env, unsigned int flags, unsigned long newsp,
+            uint32_t *parent_tidptr, void *newtls,
+            uint32_t *child_tidptr)
 {
     int ret;
     TaskState *ts;
     uint8_t *new_stack;
     CPUState *new_env;
+#ifdef USE_NPTL
+    unsigned int nptl_flags;
+
+    if (flags & CLONE_PARENT_SETTID)
+        *parent_tidptr = gettid();
+#endif
     
     if (flags & CLONE_VM) {
         ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE);
@@ -1510,16 +1537,64 @@ int do_fork(CPUState *env, unsigned int 
 #error unsupported target CPU
 #endif
         new_env->opaque = ts;
+#ifdef USE_NPTL
+        nptl_flags = flags;
+        flags &= ~CLONE_NPTL_FLAGS2;
+
+        if (nptl_flags & CLONE_CHILD_CLEARTID) {
+            ts->child_tidptr = child_tidptr;
+        }
+
+        if (nptl_flags & CLONE_SETTLS)
+            cpu_set_tls (new_env, newtls);
+
+        /* Grab the global cpu lock so that the thread setup appears
+           atomic.  */
+        if (nptl_flags & CLONE_CHILD_SETTID)
+            spin_lock(&nptl_lock);
+
+#else
+        if (flags & CLONE_NPTL_FLAGS2)
+            return -EINVAL;
+#endif
 #ifdef __ia64__
         ret = clone2(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
 #else
        ret = clone(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
 #endif
+#ifdef USE_NPTL
+        if (ret != -1) {
+            if (nptl_flags & CLONE_CHILD_SETTID)
+                *child_tidptr = ret;
+        }
+
+        /* Allow the child to continue.  */
+        if (nptl_flags & CLONE_CHILD_SETTID)
+            spin_unlock(&nptl_lock);
+#endif
     } else {
         /* if no CLONE_VM, we consider it is a fork */
-        if ((flags & ~CSIGNAL) != 0)
+        if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0)
             return -EINVAL;
         ret = fork();
+#ifdef USE_NPTL
+        /* There is a race condition here.  The parent process could
+           theoretically read the TID in the child process before the child
+           tid is set.  This would require using either ptrace
+           (not implemented) or having *_tidptr to point at a shared memory
+           mapping.  We can't repeat the spinlock hack used above because
+           the child process gets its own copy of the lock.  */
+        if (ret == 0) {
+            /* Child Process.  */
+            if (flags & CLONE_CHILD_SETTID)
+                *child_tidptr = gettid();
+            ts = (TaskState *)env->opaque;
+            if (flags & CLONE_CHILD_CLEARTID)
+                ts->child_tidptr = child_tidptr;
+            if (flags & CLONE_SETTLS)
+                cpu_set_tls (env, newtls);
+        }
+#endif
     }
     return ret;
 }
@@ -1691,7 +1766,7 @@ long do_syscall(void *cpu_env, int num, 
         ret = do_brk((char *)arg1);
         break;
     case TARGET_NR_fork:
-        ret = get_errno(do_fork(cpu_env, SIGCHLD, 0));
+        ret = get_errno(do_fork(cpu_env, SIGCHLD, 0, NULL, NULL, NULL));
         break;
     case TARGET_NR_waitpid:
         {
@@ -2469,7 +2544,8 @@ long do_syscall(void *cpu_env, int num, 
         ret = get_errno(fsync(arg1));
         break;
     case TARGET_NR_clone:
-        ret = get_errno(do_fork(cpu_env, arg1, arg2));
+        ret = get_errno(do_fork(cpu_env, arg1, arg2, (uint32_t *)arg3,
+                        (void *)arg4, (uint32_t *)arg5));
         break;
 #ifdef __NR_exit_group
         /* new thread calls */
@@ -2803,7 +2879,8 @@ long do_syscall(void *cpu_env, int num, 
         goto unimplemented;
 #endif
     case TARGET_NR_vfork:
-        ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0));
+        ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
+                                NULL, NULL, NULL));
         break;
 #ifdef TARGET_NR_ugetrlimit
     case TARGET_NR_ugetrlimit:
Index: linux-user/arm/syscall.h
===================================================================
RCS file: /cvsroot/qemu/qemu/linux-user/arm/syscall.h,v
retrieving revision 1.6
diff -u -p -r1.6 syscall.h
--- linux-user/arm/syscall.h    27 Apr 2005 20:11:21 -0000      1.6
+++ linux-user/arm/syscall.h    14 Aug 2005 14:40:30 -0000
@@ -28,7 +28,9 @@ struct target_pt_regs {
 #define ARM_SYSCALL_BASE       0x900000
 #define ARM_THUMB_SYSCALL      0
 
-#define ARM_NR_cacheflush (ARM_SYSCALL_BASE + 0xf0000 + 2)
+#define ARM_NR_BASE      0xf0000
+#define ARM_NR_cacheflush (ARM_NR_BASE + 2)
+#define ARM_NR_set_tls   (ARM_NR_BASE + 5)
 
 #define ARM_NR_semihosting       0x123456
 #define ARM_NR_thumb_semihosting  0xAB
Index: target-arm/cpu.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-arm/cpu.h,v
retrieving revision 1.10
diff -u -p -r1.10 cpu.h
--- target-arm/cpu.h    17 Apr 2005 19:16:13 -0000      1.10
+++ target-arm/cpu.h    14 Aug 2005 14:40:30 -0000
@@ -32,6 +32,7 @@
 #define EXCP_SWI             2   /* software interrupt */
 #define EXCP_PREFETCH_ABORT  3
 #define EXCP_DATA_ABORT      4
+#define EXCP_KERNEL_TRAP     5   /* Jumped to kernel code page.  */
 
 /* We currently assume float and double are IEEE single and double
    precision respectively.
@@ -55,6 +56,10 @@ typedef struct CPUARMState {
 
     /* coprocessor 15 (MMU) status */
     uint32_t cp15_6;
+#if defined(CONFIG_USER_ONLY) && defined(USE_NPTL)
+    /* TLS register.  */
+    uint32_t tls;
+#endif
     
     /* exception/interrupt handling */
     jmp_buf jmp_env;
@@ -106,6 +111,15 @@ void cpu_arm_close(CPUARMState *s);
 struct siginfo;
 int cpu_arm_signal_handler(int host_signum, struct siginfo *info, 
                            void *puc);
+
+void cpu_lock(void);
+void cpu_unlock(void);
+#if defined(USE_NPTL)
+static inline void cpu_set_tls(CPUARMState *env, void *newtls)
+{
+  env->tls = (uint32_t)newtls;
+}
+#endif
 
 #define TARGET_PAGE_BITS 12
 #include "cpu-all.h"
Index: target-arm/exec.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-arm/exec.h,v
retrieving revision 1.5
diff -u -p -r1.5 exec.h
--- target-arm/exec.h   22 Feb 2005 19:27:29 -0000      1.5
+++ target-arm/exec.h   14 Aug 2005 14:40:30 -0000
@@ -57,8 +57,6 @@ int cpu_arm_handle_mmu_fault (CPUState *
 
 /* In op_helper.c */
 
-void cpu_lock(void);
-void cpu_unlock(void);
 void cpu_loop_exit(void);
 
 void raise_exception(int);
Index: target-arm/op.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-arm/op.c,v
retrieving revision 1.14
diff -u -p -r1.14 op.c
--- target-arm/op.c     13 May 2005 22:45:23 -0000      1.14
+++ target-arm/op.c     14 Aug 2005 14:40:30 -0000
@@ -887,6 +887,12 @@ void OPPROTO op_debug(void)
     cpu_loop_exit();
 }
 
+void OPPROTO op_kernel_trap(void)
+{
+    env->exception_index = EXCP_KERNEL_TRAP;
+    cpu_loop_exit();
+}
+
 /* VFP support.  We follow the convention used for VFP instrunctions:
    Single precition routines have a "s" suffix, double precision a
    "d" suffix.  */
Index: target-arm/translate.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-arm/translate.c,v
retrieving revision 1.26
diff -u -p -r1.26 translate.c
--- target-arm/translate.c      13 May 2005 22:50:47 -0000      1.26
+++ target-arm/translate.c      14 Aug 2005 14:40:30 -0000
@@ -2034,6 +2034,7 @@ undef:
     s->is_jmp = DISAS_JUMP;
 }
 
+
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
    basic block 'tb'. If search_pc is TRUE, also generate PC
    information for each intermediate instruction. */
@@ -2063,6 +2064,15 @@ static inline int gen_intermediate_code_
     nb_gen_labels = 0;
     lj = -1;
     do {
+#ifdef CONFIG_USER_ONLY
+        /* Intercept jump to the magic kernel page.  */
+        if (dc->pc > 0xffff0000) {
+            gen_op_kernel_trap();
+            dc->is_jmp = DISAS_UPDATE;
+            break;
+        }
+#endif
+
         if (env->nb_breakpoints > 0) {
             for(j = 0; j < env->nb_breakpoints; j++) {
                 if (env->breakpoints[j] == dc->pc) {

reply via email to

[Prev in Thread] Current Thread [Next in Thread]