qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] target-tilegx: Implement v*add and v*sub instru


From: Richard Henderson
Subject: Re: [Qemu-devel] [PATCH] target-tilegx: Implement v*add and v*sub instructions
Date: Fri, 18 Sep 2015 19:34:32 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0

On 09/18/2015 05:03 PM, address@hidden wrote:
+uint64_t helper_v1add(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        int64_t ae = (int8_t)(a >> i);
+        int64_t be = (int8_t)(b >> i);
+        r |= ((ae + be) & 0xff) << i;
+    }
+    return r;
+}
+
+uint64_t helper_v2add(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 64; i += 16) {
+        int64_t ae = (int16_t)(a >> i);
+        int64_t be = (int16_t)(b >> i);
+        r |= ((ae + be) & 0xffff) << i;
+    }
+    return r;
+}

There's a trick for this that's more efficient for 4 or more elements per vector (i.e. good for v2 and v1, but not v4):

   a + b = (a & 0x7f7f7f7f) + (b & 0x7f7f7f7f)) ^ ((a ^ b) & 0x80808080)

   a - b = (a | 0x80808080) - (b & 0x7f7f7f7f)) ^ ((a ^ ~b) & 0x80808080)

+uint64_t helper_v4add(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 64; i += 32) {
+        int64_t ae = (int32_t)(a >> i);
+        int64_t be = (int32_t)(b >> i);
+        r |= ((ae + be) & 0xffffffff) << i;
+    }
+    return r;
+}

I should have mentioned this in the previous patch...

I think probably it would be best to open-code all, or most of, the v4 operations. Something like

static void gen_v4op(TCGv d64, TCGv a64, TCGv b64,
                     void (*generate)(TCGv_i32, TCGv_i32, TCGv_i32))
{
    TCGv_i32 al = tcg_temp_new_i32();
    TCGv_i32 ah = tcg_temp_new_i32();
    TCGv_i32 bl = tcg_temp_new_i32();
    TCGv_i32 bh = tcg_temp_new_i32();

    tcg_gen_extr_i64_i32(al, ah, a64);
    tcg_gen_extr_i64_i32(bl, bh, b64);
    generate(al, al, bl);
    generate(ah, ah, bh);
    tcg_gen_concat_i32_i64(d64, al, ah);

    tcg_temp_free_i32(al);
    tcg_temp_free_i32(ah);
    tcg_temp_free_i32(bl);
    tcg_temp_free_i32(bh);
}

      case OE_RRR(V4ADD, 0, X0):
      case OE_RRR(V4ADD, 0, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_helper_v4add(tdest, tsrca, tsrcb);

And then

    gen_v4op(tdest, tsrca, tsrcb, tcg_gen_add_i32);


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]