qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [patch] Arm constant pool generation


From: Paul Brook
Subject: [Qemu-devel] [patch] Arm constant pool generation
Date: Sat, 23 Apr 2005 16:56:51 +0100
User-agent: KMail/1.7.2

The current Arm dyngen constant pool code only handles normal ldr addressing 
modes. I couldn't convince myself that the existing code actually worked, so 
I rewrote it. The new code supports different addressing modes (with 
different limits), as used by coprocessor load instructions and the armv5te 
load-double instructions. Only normal ldr and FPA loads are currently 
implemented.

The new implementation is slightly different to the old code. Each op is split 
into a code fragment and a constant pool. When a constant load is seen, we 
calculate the amount of "spare" offset available, ie how much we can move the 
constant pool by. The key thing is that unlike the only code we don't reorder 
the constant pool entries. Doing so could produce out-of-range loads.

Before generating code for each op we check that the code is small enough to 
fit in between the previous code, and the pending constant pool. If it isn't, 
we output the constant pool there.

Tested by shortening the permissible branch range to force pool insertion.

Paul
Index: dyngen.c
===================================================================
RCS file: /cvsroot/qemu/qemu/dyngen.c,v
retrieving revision 1.38
diff -u -p -r1.38 dyngen.c
--- dyngen.c    21 Feb 2005 19:53:34 -0000      1.38
+++ dyngen.c    11 Mar 2005 00:53:05 -0000
@@ -1211,90 +1211,137 @@ int arm_emit_ldr_info(const char *name, 
 {
     uint8_t *p;
     uint32_t insn;
-    int offset, min_offset, pc_offset, data_size;
+    int offset, min_offset, pc_offset, data_size, spare, max_pool;
     uint8_t data_allocated[1024];
     unsigned int data_index;
+    int type;
     
     memset(data_allocated, 0, sizeof(data_allocated));
     
     p = p_start;
     min_offset = p_end - p_start;
+    spare = 0x7fffffff;
     while (p < p_start + min_offset) {
         insn = get32((uint32_t *)p);
+        /* TODO: Armv5e ldrd.  */
+        /* TODO: VFP load.  */
         if ((insn & 0x0d5f0000) == 0x051f0000) {
             /* ldr reg, [pc, #im] */
             offset = insn & 0xfff;
             if (!(insn & 0x00800000))
-                        offset = -offset;
+                offset = -offset;
+            max_pool = 4096;
+            type = 0;
+        } else if ((insn & 0x0e5f0f00) == 0x0c1f0100) {
+            /* FPA ldf.  */
+            offset = (insn & 0xff) << 2;
+            if (!(insn & 0x00800000))
+                offset = -offset;
+            max_pool = 1024;
+            type = 1;
+        } else if ((insn & 0x0fff0000) == 0x028f0000) {
+            /* Some gcc load a doubleword immediate with
+               add regN, pc, #imm
+               ldmia regN, {regN, regM}
+               Hope and pray the compiler never generates somethin like
+               add reg, pc, #imm1; ldr reg, [reg, #-imm2]; */
+            int r;
+
+            r = (insn & 0xf00) >> 7;
+            offset = ((insn & 0xff) >> r) | ((insn & 0xff) << (32 - r));
+            max_pool = 1024;
+            type = 2;
+        } else {
+            max_pool = 0;
+            type = -1;
+        }
+        if (type >= 0) {
+            /* PC-relative load needs fixing up.  */
+            if (spare > max_pool - offset)
+                spare = max_pool - offset;
             if ((offset & 3) !=0)
-                error("%s:%04x: ldr pc offset must be 32 bit aligned", 
+                error("%s:%04x: pc offset must be 32 bit aligned", 
+                      name, start_offset + p - p_start);
+            if (offset < 0)
+                error("%s:%04x: Embedded literal value",
                       name, start_offset + p - p_start);
             pc_offset = p - p_start + offset + 8;
             if (pc_offset <= (p - p_start) || 
                 pc_offset >= (p_end - p_start))
-                error("%s:%04x: ldr pc offset must point inside the function 
code", 
+                error("%s:%04x: pc offset must point inside the function 
code", 
                       name, start_offset + p - p_start);
             if (pc_offset < min_offset)
                 min_offset = pc_offset;
             if (outfile) {
-                /* ldr position */
+                /* The intruction position */
                 fprintf(outfile, "    arm_ldr_ptr->ptr = gen_code_ptr + 
%d;\n", 
                         p - p_start);
-                /* ldr data index */
-                data_index = ((p_end - p_start) - pc_offset - 4) >> 2;
-                fprintf(outfile, "    arm_ldr_ptr->data_ptr = arm_data_ptr + 
%d;\n", 
+                /* The position of the constant pool data.  */
+                data_index = ((p_end - p_start) - pc_offset) >> 2;
+                fprintf(outfile, "    arm_ldr_ptr->data_ptr = arm_data_ptr - 
%d;\n", 
                         data_index);
+                fprintf(outfile, "    arm_ldr_ptr->type = %d;\n", type);
                 fprintf(outfile, "    arm_ldr_ptr++;\n");
-                if (data_index >= sizeof(data_allocated))
-                    error("%s: too many data", name);
-                if (!data_allocated[data_index]) {
-                    ELF_RELOC *rel;
-                    int i, addend, type;
-                    const char *sym_name, *p;
-                    char relname[1024];
-
-                    data_allocated[data_index] = 1;
-
-                    /* data value */
-                    addend = get32((uint32_t *)(p_start + pc_offset));
-                    relname[0] = '\0';
-                    for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
-                        if (rel->r_offset == (pc_offset + start_offset)) {
-                            sym_name = get_rel_sym_name(rel);
-                            /* the compiler leave some unnecessary references 
to the code */
-                            get_reloc_expr(relname, sizeof(relname), sym_name);
-                            type = ELF32_R_TYPE(rel->r_info);
-                            if (type != R_ARM_ABS32)
-                                error("%s: unsupported data relocation", name);
-                            break;
-                        }
-                    }
-                    fprintf(outfile, "    arm_data_ptr[%d] = 0x%x",
-                            data_index, addend);
-                    if (relname[0] != '\0')
-                        fprintf(outfile, " + %s", relname);
-                    fprintf(outfile, ";\n");
-                }
             }
         }
         p += 4;
     }
+
+    /* Copy and relocate the constant pool data.  */
     data_size = (p_end - p_start) - min_offset;
     if (data_size > 0 && outfile) {
-        fprintf(outfile, "    arm_data_ptr += %d;\n", data_size >> 2);
+        spare += min_offset;
+        fprintf(outfile, "    arm_data_ptr -= %d;\n", data_size >> 2);
+        fprintf(outfile, "    arm_pool_ptr -= %d;\n", data_size);
+        fprintf(outfile, "    if (arm_pool_ptr > gen_code_ptr + %d)\n"
+                         "        arm_pool_ptr = gen_code_ptr + %d;\n",
+                         spare, spare);
+
+        data_index = 0;
+        for (pc_offset = min_offset;
+             pc_offset < p_end - p_start;
+             pc_offset += 4) {
+
+            ELF_RELOC *rel;
+            int i, addend, type;
+            const char *sym_name;
+            char relname[1024];
+
+            /* data value */
+            addend = get32((uint32_t *)(p_start + pc_offset));
+            relname[0] = '\0';
+            for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                if (rel->r_offset == (pc_offset + start_offset)) {
+                    sym_name = get_rel_sym_name(rel);
+                    /* the compiler leave some unnecessary references to the 
code */
+                    get_reloc_expr(relname, sizeof(relname), sym_name);
+                    type = ELF32_R_TYPE(rel->r_info);
+                    if (type != R_ARM_ABS32)
+                        error("%s: unsupported data relocation", name);
+                    break;
+                }
+            }
+            fprintf(outfile, "    arm_data_ptr[%d] = 0x%x",
+                    data_index, addend);
+            if (relname[0] != '\0')
+                fprintf(outfile, " + %s", relname);
+            fprintf(outfile, ";\n");
+
+            data_index++;
+        }
     }
 
-    /* the last instruction must be a mov pc, lr */
+    /* the last instruction must be ldmfd fp, {..., pc}  */
     if (p == p_start)
         goto arm_ret_error;
     p -= 4;
     insn = get32((uint32_t *)p);
-    if ((insn & 0xffff0000) != 0xe91b0000) {
+    if ((insn & 0xffff8000) != 0xe91b8000) {
     arm_ret_error:
         if (!outfile)
             printf("%s: invalid epilog\n", name);
     }
-    return p - p_start;            
+    return p - p_start;
 }
 #endif
 
@@ -2221,6 +2268,28 @@ int gen_file(FILE *outfile, int out_type
         
     } else {
         /* generate big code generation switch */
+
+#ifdef HOST_ARM
+        /* We need to know the size of all the ops so we can figure out when
+           to emit constant pools.  This must be consistent with opc.h.  */
+fprintf(outfile,
+"static const uint32_t arm_opc_size[] = {\n"
+"  0,\n" /* end */
+"  0,\n" /* nop */
+"  0,\n" /* nop1 */
+"  0,\n" /* nop2 */
+"  0,\n"); /* nop3 */
+        for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+            const char *name;
+            name = get_sym_name(sym);
+            if (strstart(name, OP_PREFIX, NULL)) {
+                fprintf(outfile, "  %d,\n", sym->st_size);
+            }
+       }
+fprintf(outfile,
+"};\n");
+#endif
+
 fprintf(outfile,
 "int dyngen_code(uint8_t *gen_code_buf,\n"
 "                uint16_t *label_offsets, uint16_t *jmp_offsets,\n"
@@ -2231,10 +2300,36 @@ fprintf(outfile,
 "    const uint32_t *opparam_ptr;\n");
 
 #ifdef HOST_ARM
+/* Arm is tricky because it uses constant pools for loading immediate values.
+   We assume (and require) each function is code followed by a constant pool.
+   All the ops are small so this should be ok.  For each op we figure
+   out how much "spare" range we have in the load instructions.  This allows
+   us to insert subsequent ops in between the op and the constant pool,
+   eliminating the neeed to jump around the pool.
+
+   We currently generate:
+   
+   [ For this example we assume merging would move op1_pool out of range.
+     In practive we should be able to combine many ops before the offset
+     limits are reached. ]
+   op1_code;
+   op2_code;
+   goto op3;
+   op2_pool;
+   op1_pool;
+op3:
+   op3_code;
+   ret;
+   op3_pool;
+
+   Ideally we'd put op1_pool before op2_pool, but that requires two passes.
+ */
 fprintf(outfile,
 "    uint8_t *last_gen_code_ptr = gen_code_buf;\n"
 "    LDREntry *arm_ldr_ptr = arm_ldr_table;\n"
-"    uint32_t *arm_data_ptr = arm_data_table;\n");
+"    uint32_t *arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n"
+/* Initialise the parmissible pool offset to an arbitary large value.  */
+"    uint8_t *arm_pool_ptr = gen_code_buf + 0x1000000;\n");
 #endif
 
 fprintf(outfile,
@@ -2246,9 +2341,23 @@ fprintf(outfile,
        /* Generate prologue, if needed. */ 
 
 fprintf(outfile,
-"    for(;;) {\n"
-"        switch(*opc_ptr++) {\n"
-);
+"    for(;;) {\n");
+
+#ifdef HOST_ARM
+/* Generate constant pool if needed */
+fprintf(outfile,
+"            if (gen_code_ptr + arm_opc_size[*opc_ptr] >= arm_pool_ptr) {\n"
+"                gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, "
+"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 1);\n"
+"                last_gen_code_ptr = gen_code_ptr;\n"
+"                arm_ldr_ptr = arm_ldr_table;\n"
+"                arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n"
+"                arm_pool_ptr = gen_code_ptr + 0x1000000;\n"
+"            }\n");
+#endif
+
+fprintf(outfile,
+"        switch(*opc_ptr++) {\n");
 
         for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
             const char *name;
@@ -2282,17 +2391,6 @@ fprintf(outfile,
 "            goto the_end;\n"
 "        }\n");
 
-#ifdef HOST_ARM
-/* generate constant table if needed */
-fprintf(outfile,
-"        if ((gen_code_ptr - last_gen_code_ptr) >= (MAX_FRAG_SIZE - 
MAX_OP_SIZE)) {\n"
-"            gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, 
arm_ldr_ptr, arm_data_table, arm_data_ptr, 1);\n"
-"            last_gen_code_ptr = gen_code_ptr;\n"
-"            arm_ldr_ptr = arm_ldr_table;\n"
-"            arm_data_ptr = arm_data_table;\n"
-"        }\n");         
-#endif
-
 
 fprintf(outfile,
 "    }\n"
@@ -2301,7 +2399,10 @@ fprintf(outfile,
 
 /* generate some code patching */ 
 #ifdef HOST_ARM
-fprintf(outfile, "gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, 
arm_ldr_ptr, arm_data_table, arm_data_ptr, 0);\n");
+fprintf(outfile,
+"if (arm_data_ptr != arm_data_table + ARM_LDR_TABLE_SIZE)\n"
+"    gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, "
+"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 0);\n");
 #endif
     /* flush instruction cache */
     fprintf(outfile, "flush_icache_range((unsigned long)gen_code_buf, 
(unsigned long)gen_code_ptr);\n");
Index: dyngen.h
===================================================================
RCS file: /cvsroot/qemu/qemu/dyngen.h,v
retrieving revision 1.7
diff -u -p -r1.7 dyngen.h
--- dyngen.h    3 Jan 2005 23:40:55 -0000       1.7
+++ dyngen.h    11 Mar 2005 00:53:05 -0000
@@ -134,18 +134,16 @@ void fix_bsr(void *p, int offset) {
 
 #ifdef __arm__
 
-#define MAX_OP_SIZE    (128 * 4) /* in bytes */
-/* max size of the code that can be generated without calling arm_flush_ldr */
-#define MAX_FRAG_SIZE  (1024 * 4) 
-//#define MAX_FRAG_SIZE  (135 * 4) /* for testing */ 
+#define ARM_LDR_TABLE_SIZE 1024
 
 typedef struct LDREntry {
     uint8_t *ptr;
     uint32_t *data_ptr;
+    unsigned type:2;
 } LDREntry;
 
 static LDREntry arm_ldr_table[1024];
-static uint32_t arm_data_table[1024];
+static uint32_t arm_data_table[ARM_LDR_TABLE_SIZE];
 
 extern char exec_loop;
 
@@ -164,8 +162,9 @@ static uint8_t *arm_flush_ldr(uint8_t *g
     int offset, data_size, target;
     uint8_t *data_ptr;
     uint32_t insn;
+    uint32_t mask;
  
-    data_size = (uint8_t *)data_end - (uint8_t *)data_start;
+    data_size = (data_end - data_start) << 2;
 
     if (gen_jmp) {
         /* generate branch to skip the data */
@@ -187,17 +186,48 @@ static uint8_t *arm_flush_ldr(uint8_t *g
         offset = ((unsigned long)(le->data_ptr) - (unsigned long)data_start) + 
             (unsigned long)data_ptr - 
             (unsigned long)ptr - 8;
-        insn = *ptr & ~(0xfff | 0x00800000);
         if (offset < 0) {
-            offset = - offset;
-        } else {
-            insn |= 0x00800000;
-        }
-        if (offset > 0xfff) {
-            fprintf(stderr, "Error ldr offset\n");
+            fprintf(stderr, "Negative constant pool offset\n");
             abort();
         }
-        insn |= offset;
+        switch (le->type) {
+          case 0: /* ldr */
+            mask = ~0x00800fff;
+            if (offset >= 4096) {
+                fprintf(stderr, "Bad ldr offset\n");
+                abort();
+            }
+            break;
+          case 1: /* ldc */
+            mask = ~0x008000ff;
+            if (offset >= 1024 ) {
+                fprintf(stderr, "Bad ldc offset\n");
+                abort();
+            }
+            break;
+          case 2: /* add */
+            mask = ~0xfff;
+            if (offset >= 1024 ) {
+                fprintf(stderr, "Bad add offset\n");
+                abort();
+            }
+            break;
+          default:
+            fprintf(stderr, "Bad pc relative fixup\n");
+            abort();
+          }
+        insn = *ptr & mask;
+        switch (le->type) {
+          case 0: /* ldr */
+            insn |= offset | 0x00800000;
+            break;
+          case 1: /* ldc */
+            insn |= (offset >> 2) | 0x00800000;
+            break;
+          case 2: /* add */
+            insn |= (offset >> 2) | 0xf00;
+            break;
+          }
         *ptr = insn;
     }
     return gen_code_ptr;

reply via email to

[Prev in Thread] Current Thread [Next in Thread]