avr-gcc-list
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[avr-gcc-list] [patch,avr-gcc] optimizing mov SI/SF


From: Georg-Johann Lay
Subject: [avr-gcc-list] [patch,avr-gcc] optimizing mov SI/SF
Date: Tue, 24 Mar 2009 22:01:41 +0100
User-agent: Thunderbird 2.0.0.21 (Windows/20090302)

Hi,

this patch is a minor optimization when moving a compile time const SI/SF to a reg that is not in LD_REGS. The patch tracks the value in the scratch register and saves some moves here and there.

A small test program is attached to see the effect. Compile, e.g. with
  -Os -dp -S
  -Os -dp -S -fno-split-wide-types
and look out for the movsf/reload_insi patterns.

The drawback is that the constant's value is obfuscated in the asm output.

Same could be done for cmpsi. Things with cmpsi would be easier with the adjust_insn_len skeleton, so that work can wait until sometimes.

For SI there some masks like 0xffff0000 and 0x0000ffff in libgcc where the patch will take effekt.

Greets,

Georg-Johann

Index: /local/gcc.gnu.org/trunk/gcc/config/avr/avr.c
===================================================================
--- /local/gcc.gnu.org/trunk/gcc/config/avr/avr.c       (Revision 145005)
+++ /local/gcc.gnu.org/trunk/gcc/config/avr/avr.c       (Arbeitskopie)
@@ -2472,6 +2472,19 @@ out_movsi_mr_r (rtx insn, rtx op[], int 
   return "";
 }
 
+/* An internal helper:
+   len == NULL: dispatch to output_asm_insn
+   len != NULL: add inc to *len, no output. Used below adjust_insn_length */
+
+static inline void
+output_asm_insn_len (const char *str, rtx *operands, int *len, int inc)
+{
+  if (len)
+    (*len) += inc;
+  else
+    output_asm_insn (str, operands);
+}
+
 const char *
 output_movsisf(rtx insn, rtx operands[], int *l)
 {
@@ -2589,17 +2602,51 @@ output_movsisf(rtx insn, rtx operands[],
            }
          
          /* Last resort, better than loading from memory.  */
-         *l = 10;
-         return (AS2 (mov,__tmp_reg__,r31) CR_TAB
-                 AS2 (ldi,r31,lo8(%1))     CR_TAB
-                 AS2 (mov,%A0,r31)         CR_TAB
-                 AS2 (ldi,r31,hi8(%1))     CR_TAB
-                 AS2 (mov,%B0,r31)         CR_TAB
-                 AS2 (ldi,r31,hlo8(%1))    CR_TAB
-                 AS2 (mov,%C0,r31)         CR_TAB
-                 AS2 (ldi,r31,hhi8(%1))    CR_TAB
-                 AS2 (mov,%D0,r31)         CR_TAB
-                 AS2 (mov,r31,__tmp_reg__));
+          /* FIXME: Probably worse than defining reload_insf
+                    because in most cases a LD_REGS scratch might
+                    be available, so then there would be no need to
+                    save/restore the scratch by hand. But if no LD_REGS
+                    was available, some stuff will be spilled... */
+
+          {
+            int nbytes = GET_MODE_SIZE (GET_MODE (dest));
+            int dest_regno = true_regnum (dest);
+            int tmp_regno = dest_regno + nbytes-1;
+            rtx tmp_reg;
+            rtx op[3];
+
+            /* Even though dest is not in class "d", some regs of it
+               may be in "d". This happens if dest lives in R14.
+               In that special case we don't need to bother Z+1. */
+
+            if (!TEST_HARD_REG_CLASS (LD_REGS, tmp_regno))
+              tmp_regno = REG_Z+1;
+
+            tmp_reg = gen_rtx_REG (QImode, tmp_regno);
+
+            op[0] = dest;
+            op[1] = operands[1];
+            op[2] = tmp_reg;
+
+            /* save tmp_reg if needed. Adjust len is at restoring */
+            if (!real_l
+                && REG_Z+1 == tmp_regno)
+              {
+                output_asm_insn (AS2 (mov,__tmp_reg__,%2), op);
+              }
+
+            /* output the very move: dispatch to reload_in */
+            output_reload_insisf (insn, op, real_l);
+
+            /* restore tmp_reg if needed */
+            if (REG_Z+1 == tmp_regno)
+              {
+                /* +2 because save from above */
+                output_asm_insn_len (AS2 (mov,%2,__tmp_reg__), op, real_l, 2);
+              }
+
+            return "";
+          }
        }
       else if (GET_CODE (src) == MEM)
        return out_movsi_r_mr (insn, operands, real_l); /* mov r,m */
@@ -5908,50 +5955,81 @@ output_reload_inhi (rtx insn ATTRIBUTE_U
 const char *
 output_reload_insisf (rtx insn ATTRIBUTE_UNUSED, rtx *operands, int *len)
 {
+  rtx op[3];
+  rtx dest = operands[0];
   rtx src = operands[1];
-  int cnst = (GET_CODE (src) == CONST_INT);
-
-  if (len)
-    {
-      if (cnst)
-       *len = 4 + ((INTVAL (src) & 0xff) != 0)
-               + ((INTVAL (src) & 0xff00) != 0)
-               + ((INTVAL (src) & 0xff0000) != 0)
-               + ((INTVAL (src) & 0xff000000) != 0);
-      else
-       *len = 8;
 
-      return "";
-    }
+  unsigned HOST_WIDE_INT ival;
+  HOST_WIDE_INT tmp_val = 0x100;
+  int byte_no;
 
-  if (cnst && ((INTVAL (src) & 0xff) == 0))
-    output_asm_insn (AS2 (mov, %A0, __zero_reg__), operands);
-  else
+  if (CONST_INT == GET_CODE (src))
     {
-      output_asm_insn (AS2 (ldi, %2, lo8(%1)), operands);
-      output_asm_insn (AS2 (mov, %A0, %2), operands);
+      ival = INTVAL (src);
     }
-  if (cnst && ((INTVAL (src) & 0xff00) == 0))
-    output_asm_insn (AS2 (mov, %B0, __zero_reg__), operands);
-  else
+  else if (CONST_DOUBLE == GET_CODE (src))
     {
-      output_asm_insn (AS2 (ldi, %2, hi8(%1)), operands);
-      output_asm_insn (AS2 (mov, %B0, %2), operands);
+      REAL_VALUE_TYPE rv;
+      gcc_assert (SFmode == GET_MODE (src));
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, src);
+      REAL_VALUE_TO_TARGET_SINGLE (rv, ival);
     }
-  if (cnst && ((INTVAL (src) & 0xff0000) == 0))
-    output_asm_insn (AS2 (mov, %C0, __zero_reg__), operands);
   else
     {
-      output_asm_insn (AS2 (ldi, %2, hlo8(%1)), operands);
-      output_asm_insn (AS2 (mov, %C0, %2), operands);
+      gcc_unreachable();
     }
-  if (cnst && ((INTVAL (src) & 0xff000000) == 0))
-    output_asm_insn (AS2 (mov, %D0, __zero_reg__), operands);
-  else
+
+  if (len)
+    *len = 0;
+  
+  /* Print the bytes. Start with LSB because the scratch reg op[2]
+     may be the LSB if this is called from output_movsisf.
+     Note that some parts of dest may be in LD_REGS even though dest
+     is in NO_LD_REGS */
+
+  op[2] = operands[2];
+  
+  for (byte_no = 0; byte_no < GET_MODE_SIZE (SImode); byte_no++)
     {
-      output_asm_insn (AS2 (ldi, %2, hhi8(%1)), operands);
-      output_asm_insn (AS2 (mov, %D0, %2), operands);
+      int val8 = ival & 0xff;
+      int regno = true_regnum (dest) + byte_no;
+
+      op[0] = gen_rtx_REG (QImode, regno);
+
+      if (0 == val8)
+        {
+          /* Value == 0 is fine */
+          output_asm_insn_len (AS1 (clr, %0), op, len, 1);
+        }
+      else
+        {
+          /* Value != 0 must go through an LD_REG */
+          op[1] = GEN_INT (val8);
+
+          if (test_hard_reg_class (LD_REGS, op[0]))
+            {
+              /* Scratch not needed */
+              output_asm_insn_len (AS2 (ldi,%0,%1), op, len, 1);
+            }
+          else
+            {
+              /* Need the scratch */
+              if (val8 != tmp_val)
+                {
+                  /* Value in scratch doesn't fit. Load val8 into it.
+                     Keep track of tmp_val, i.e. value in scratch reg */
+                  tmp_val = val8;
+                  
+                  output_asm_insn_len (AS2 (ldi,%2,%1), op, len, 1);
+                }
+
+              output_asm_insn_len (AS2 (mov,%0,%2), op, len, 1);
+            }
+        }
+
+      ival >>= 8;
     }
+
   return "";
 }
 
void foo ();

char x;

int movsi_1 (long long a, long long b, long i)
{
    if (x)
        i = 0xffff00ff;

    foo();

    return i;
}

int movsi_2 (long long a, long b, long i)
{
    if (x)
        i = 0xffff00ff;

    foo();

    return i;
}

float movsf_1 (long long a, long long b, float i)
{
    if (x)
        i = 3.0f;

    foo();

    return i;
}

float movsf_2 (long long a, long b, float i)
{
    if (x)
        i = 3.0f;

    foo();

    return i;
}

reply via email to

[Prev in Thread] Current Thread [Next in Thread]