tinycc-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Tinycc-devel] Unify C and asm symbols


From: grischka
Subject: Re: [Tinycc-devel] Unify C and asm symbols
Date: Wed, 06 Dec 2017 18:39:30 +0100
User-agent: Thunderbird 2.0.0.23 (Windows/20090812)

Michael Matz wrote:
Bleh, I wanted to ask one thing about the no_asmstack patch, the hunk:

@@ -390,7 +388,7 @@ ST_FUNC int find_elf_sym(Section *s, const char *name)
     while (sym_index != 0) {
         sym = &((ElfW(Sym) *)s->data)[sym_index];
         name1 = (char *) s->link->data + sym->st_name;
-        if (!strcmp(name, name1))
+        if (!strcmp(name, name1) && ELFW(ST_BIND)(sym->st_info) != STB_LOCAL)
             return sym_index;
         sym_index = ((int *)hs->data)[2 + nbuckets + sym_index];
     }

My tests go through also without this hunk, so it's either an optimization (but then it should be tested before the strcmp) or it's a left-over from during development of your patch, or it's for a situation I don't know yet (perhaps one of the multi-file ones?)

f1.c:
      asm("call x1; ret; x1: ...");
f2.c:
      void x1() {}

MAY cause
      "error: 'x1' defined twice"
with multi-files, because f1.c:x1 is created (tentatively) as
STB_GLOBAL first, then (at "x1:"), it is set to STB_LOCAL, but
it is not removed from the hash chain. Therefor (at f2.c:x1(){})
find_sym() "MAY" still find it unless explicitly inhibited.

That is "MAY" because tcc rebuilds the hashtable while it grows,
and that does remove any STB_LOCAL's from the hash chains
automatically and after that everything is as it should be.

You can see the problem more reliably if you change new_symtab():
-    nb_buckets = 1;
+    nb_buckets = 10000;

Conceptually it makes sense of course: STB_LOCAL symbols should be bound via the Sym structure (i.e. within a file), not be used for providing resolution for symbols from other files, but still I'm curious why you needed it.

Well, I agree that it is ugly ;)

There are some options:

1) just to omit "&& ELFW(ST_BIND)(sym->st_info) != STB_LOCAL"

2) to omit that, but update the hash chain when we change the STB_LOCAL/GLOBAL
     attribute

3) instead of (2), call rebuild_hash() at the end of each file.

Where all of 1..3 still have this problem:
f1.c:
    x1() { printf("x1(1)\n"); }
    main() { x1(); main_2(); }
f2.c:
    main_2() { asm("call x1"); }
    static x1() { printf("x1(2)\n"); }

$ tcc f1.c f2.c -run @
will print "x1(2)" twice

This could be fixed by option 4)

4) create asm symbols as "static" first always. This however needs to
    post-process asm symbol again (to convert any still undefined symbols
    to globals),  as well as to patch relocation entries if such global
    already was defined in a previous file.

See attached (additional) patch.  It definitely would add some lines
but then again I wasn't able to produce any problems w.r.t. multi-files
with that.

Comments ? ;)

--- grischka



Ciao,
Michael.


commit f8959bfb6ef5db7de519dc1d141c236663a77a76
Author: grischka <grischka>
Date:   Wed Dec 6 17:22:35 2017 +0100

    tccasm: create labels as static first
    
    This fixes some corner case with multiple files where
    file-1 defines a global symbol and file-2 defines
    a static symbol with the same name.
    
    It does require "post-processing" of the asm labels though
    as well as a function to patch relocation entries.
    
    It also leaves 'zombie' entries of formerly static symbols
    in the table (which is not a problem).
    
    There is also a function to add/remove a symbol from the
    hash chain but it is unused.

diff --git a/tcc.h b/tcc.h
index 38bbda8..4ddb4d8 100644
--- a/tcc.h
+++ b/tcc.h
@@ -1418,6 +1418,8 @@ ST_FUNC int put_elf_str(Section *s, const char *sym);
 ST_FUNC int put_elf_sym(Section *s, addr_t value, unsigned long size, int 
info, int other, int shndx, const char *name);
 ST_FUNC int set_elf_sym(Section *s, addr_t value, unsigned long size, int 
info, int other, int shndx, const char *name);
 ST_FUNC int find_elf_sym(Section *s, const char *name);
+ST_FUNC void rehash_elf_sym(Section *s, int sym_index);
+ST_FUNC void redirect_relocs(TCCState *s1, Section *tab, int old_index, int 
new_index);
 ST_FUNC void put_elf_reloc(Section *symtab, Section *s, unsigned long offset, 
int type, int symbol);
 ST_FUNC void put_elf_reloca(Section *symtab, Section *s, unsigned long offset, 
int type, int symbol, addr_t addend);
 
@@ -1592,6 +1594,7 @@ ST_FUNC Sym* get_asm_sym(int name, Sym *csym);
 ST_FUNC void asm_expr(TCCState *s1, ExprValue *pe);
 ST_FUNC int asm_int_expr(TCCState *s1);
 ST_FUNC int tcc_assemble(TCCState *s1, int do_preprocess);
+ST_FUNC void asm_update_labels(TCCState *s1);
 /* ------------ i386-asm.c ------------ */
 ST_FUNC void gen_expr32(ExprValue *pe);
 #ifdef TCC_TARGET_X86_64
diff --git a/tccasm.c b/tccasm.c
index 4fe3f32..7eac08d 100644
--- a/tccasm.c
+++ b/tccasm.c
@@ -42,13 +42,12 @@ static Sym *asm_label_find(int v)
     return sym;
 }
 
-static Sym *asm_label_push(int v, int t)
+static Sym *asm_label_push(int v)
 {
-    Sym *sym = global_identifier_push(v, t, 0);
     /* We always add VT_EXTERN, for sym definition that's tentative
        (for .set, removed for real defs), for mere references it's correct
        as is.  */
-    sym->type.t |= VT_ASM | VT_EXTERN;
+    Sym *sym = global_identifier_push(v, VT_ASM | VT_EXTERN | VT_STATIC, 0);
     sym->r = VT_CONST | VT_SYM;
     return sym;
 }
@@ -67,7 +66,7 @@ ST_FUNC Sym* get_asm_sym(int name, Sym *csym)
 {
     Sym *sym = asm_label_find(name);
     if (!sym) {
-       sym = asm_label_push(name, 0);
+       sym = asm_label_push(name);
        if (csym)
          sym->c = csym->c;
     }
@@ -102,7 +101,7 @@ static void asm_expr_unary(TCCState *s1, ExprValue *pe)
                 /* forward */
                 if (!sym || (sym->c && elfsym(sym)->st_shndx != SHN_UNDEF)) {
                     /* if the last label is defined, then define a new one */
-                   sym = asm_label_push(label, VT_STATIC);
+                   sym = asm_label_push(label);
                 }
             }
            pe->v = 0;
@@ -381,7 +380,7 @@ static Sym* asm_new_label1(TCCState *s1, int label, int 
is_local,
         }
     } else {
     new_label:
-        sym = asm_label_push(label, is_local == 1 ? VT_STATIC : 0);
+        sym = asm_label_push(label);
     }
     if (!sym->c)
       put_extern_sym2(sym, NULL, 0, 0, 0);
@@ -392,11 +391,6 @@ static Sym* asm_new_label1(TCCState *s1, int label, int 
is_local,
     if (is_local != 2)
         sym->type.t &= ~VT_EXTERN;
 
-    if (IS_ASM_SYM(sym) && !(sym->type.t & VT_ASM_GLOBAL)) {
-        sym->type.t |= VT_STATIC;
-        update_storage(sym);
-    }
-
     return sym;
 }
 
@@ -405,6 +399,22 @@ static Sym* asm_new_label(TCCState *s1, int label, int 
is_local)
     return asm_new_label1(s1, label, is_local, cur_text_section->sh_num, ind);
 }
 
+/* At EOF, make any undefined asm symbols global */
+ST_FUNC void asm_update_labels(TCCState *s1)
+{
+    Sym *sym;
+
+    for (sym = global_stack; sym != NULL; sym = sym->prev) {
+        if (IS_ASM_SYM(sym) && (sym->type.t & VT_STATIC)) {
+            ElfSym *esym = elfsym(sym);
+            if (esym && esym->st_shndx == SHN_UNDEF) {
+                sym->type.t &= ~VT_STATIC;
+                update_storage(sym);
+            }
+        }
+    }
+}
+
 /* Set the value of LABEL to that of some expression (possibly
    involving other symbols).  LABEL can be overwritten later still.  */
 static Sym* set_symbol(TCCState *s1, int label)
@@ -979,6 +989,7 @@ ST_FUNC int tcc_assemble(TCCState *s1, int do_preprocess)
     nocode_wanted = 0;
     ret = tcc_assemble_internal(s1, do_preprocess, 1);
     cur_text_section->data_offset = ind;
+    asm_update_labels(s1);
     tcc_debug_end(s1);
     return ret;
 }
diff --git a/tccelf.c b/tccelf.c
index 657aa61..1a7ec18 100644
--- a/tccelf.c
+++ b/tccelf.c
@@ -370,6 +370,47 @@ ST_FUNC int put_elf_sym(Section *s, addr_t value, unsigned 
long size,
     return sym_index;
 }
 
+/* add or remove symbol from hash chain depending on its STB_LOCAL attribute */
+ST_FUNC void rehash_elf_sym(Section *s, int sym_index)
+{
+    Section *hs = s->hash;
+    int *base = (int *)hs->data;
+    int nbuckets = base[0];
+    ElfW(Sym) *sym = (ElfW(Sym) *)s->data + sym_index;
+    int sym_bind = ELFW(ST_BIND)(sym->st_info);
+    char *name = strtab_section->data + sym->st_name;
+    int h = elf_hash((unsigned char *) name) % nbuckets;
+    int *p;
+
+    for (p = &base[2 + h]; *p && *p != sym_index; )
+        p = &base[2 + nbuckets + *p];
+
+    if ((sym_bind == STB_LOCAL) != (0 == *p)) {
+        if (sym_bind == STB_LOCAL) {
+            //printf("make local %s\n", name);
+            *p = base[2 + nbuckets + sym_index];
+        } else {
+            //printf("make global %s\n", name);
+            *p = sym_index;
+        }
+        base[2 + nbuckets + sym_index] = 0;
+    }
+}
+
+ST_FUNC void redirect_relocs(TCCState *s1, Section *tab, int old_index, int 
new_index)
+{
+    int j;
+    for(j = 1; j < s1->nb_sections; j++) {
+        Section *sr = s1->sections[j];
+        if (sr->sh_type == SHT_RELX && sr->link == tab) {
+            ElfW_Rel *rel_end = (ElfW_Rel*)(sr->data + sr->data_offset), *rel;
+            for (rel = (ElfW_Rel*)sr->data; rel < rel_end; ++rel)
+                if (ELFW(R_SYM)(rel->r_info) == old_index)
+                    rel->r_info = ELFW(R_INFO)(new_index, 
ELFW(R_TYPE)(rel->r_info));
+        }
+    }
+}
+
 /* find global ELF symbol 'name' and return its index. Return 0 if not
    found. */
 ST_FUNC int find_elf_sym(Section *s, const char *name)
@@ -388,7 +429,7 @@ ST_FUNC int find_elf_sym(Section *s, const char *name)
     while (sym_index != 0) {
         sym = &((ElfW(Sym) *)s->data)[sym_index];
         name1 = (char *) s->link->data + sym->st_name;
-        if (!strcmp(name, name1) && ELFW(ST_BIND)(sym->st_info) != STB_LOCAL)
+        if (!strcmp(name, name1))
             return sym_index;
         sym_index = ((int *)hs->data)[2 + nbuckets + sym_index];
     }
diff --git a/tccgen.c b/tccgen.c
index 4159b9e..6e02998 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -274,6 +274,9 @@ ST_FUNC int tccgen_compile(TCCState *s1)
     next();
     decl(VT_CONST);
     gen_inline_functions(s1);
+#ifdef CONFIG_TCC_ASM
+    asm_update_labels(s1);
+#endif
     check_vstack();
     /* end of translation unit info */
     tcc_debug_end(s1);
@@ -311,9 +314,32 @@ ST_FUNC void update_storage(Sym *sym)
         sym_bind = STB_WEAK;
     else
         sym_bind = STB_GLOBAL;
+
     old_sym_bind = ELFW(ST_BIND)(esym->st_info);
     if (sym_bind != old_sym_bind) {
-        esym->st_info = ELFW(ST_INFO)(sym_bind, ELFW(ST_TYPE)(esym->st_info));
+        int info = ELFW(ST_INFO)(sym_bind, ELFW(ST_TYPE)(esym->st_info));
+        if (old_sym_bind == STB_LOCAL) {
+            char *name = tcc_strdup(strtab_section->data + esym->st_name);
+            int new_index = set_elf_sym(symtab_section,
+                esym->st_value,
+                esym->st_size,
+                info,
+                esym->st_other,
+                esym->st_shndx,
+                name
+                );
+            tcc_free(name);
+            if (new_index != sym->c) {
+                esym = &((ElfSym *)symtab_section->data)[sym->c];
+                esym->st_shndx = SHN_ABS;
+                redirect_relocs(tcc_state, symtab_section, sym->c, new_index);
+                //printf("rebind %c %2d -> %2d %s\n", sym_bind?'g':'l', 
sym->c, new_index, get_tok_str(sym->v, NULL));
+                sym->c = new_index;
+            }
+        } else {
+            esym->st_info = info;
+            //printf("update %c %s\n", sym_bind?'g':'l', get_tok_str(sym->v, 
NULL));
+        }
     }
 
 #ifdef TCC_TARGET_PE
@@ -863,9 +889,11 @@ static void patch_type(Sym *sym, CType *type)
     }
 
     if (IS_ASM_SYM(sym)) {
-        sym->type = *type;
+        sym->type.t = (sym->type.t & VT_STATIC) | (type->t & ~VT_STATIC);
+        sym->type.ref = type->ref;
+    }
 
-    } else if (!is_compatible_types(&sym->type, type)) {
+    if (!is_compatible_types(&sym->type, type)) {
         tcc_error("incompatible types for redefinition of '%s'",
                   get_tok_str(sym->v, NULL));
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]