guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 39/437: merge 64-bit cleanliness changes from mzscheme


From: Andy Wingo
Subject: [Guile-commits] 39/437: merge 64-bit cleanliness changes from mzscheme
Date: Mon, 2 Jul 2018 05:13:40 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit 76e49b1bcc3c7f06a9d3615e1587dfe12a0bea87
Author: Paolo Bonzini <address@hidden>
Date:   Mon Nov 20 13:53:41 2006 +0000

    merge 64-bit cleanliness changes from mzscheme
    
    2006-11-20  Paolo Bonzini  <address@hidden>
    
        * lightning/i386/asm-i386.h: Merge 64-bit cleanliness changes from 
mzscheme.
        * lightning/i386/asm-64.h: Likewise.
    
    git-archimport-id: address@hidden/lightning--stable--1.2--patch-41
    git-archimport-id: address@hidden/lightning--stable--1.2--patch-42
---
 AUTHORS                   |   2 +
 ChangeLog                 |   6 +
 lightning/i386/asm-64.h   |   5 +-
 lightning/i386/asm-i386.h | 279 ++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 280 insertions(+), 12 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index bda81b4..cec7e89 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,3 +1,5 @@
 Paolo Bonzini <address@hidden>
 i386 and PPC assemblers by Ian Piumarta <address@hidden>
+x86-64 backend by Matthew Flatt <address@hidden>
 Major PPC contributions by Laurent Michel <address@hidden>
+Major SPARC contributions by Ludovic Courtes <address@hidden>
diff --git a/ChangeLog b/ChangeLog
index d10cc31..2e6dbd4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,10 @@
 2006-11-20  Paolo Bonzini  <address@hidden>
+
+       * lightning/i386/asm-i386.h: Merge 64-bit cleanliness changes from 
mzscheme.
+       Add SSE.
+       * lightning/i386/asm-64.h: Likewise.
+
+2006-11-20  Paolo Bonzini  <address@hidden>
            Ludovic Courtes  <address@hidden>
 
        * lightning/i386/core-32.h: Disable jit_push and jit_pop if stack not 
needed.
diff --git a/lightning/i386/asm-64.h b/lightning/i386/asm-64.h
index 5079d0a..9f0f979 100644
--- a/lightning/i386/asm-64.h
+++ b/lightning/i386/asm-64.h
@@ -49,6 +49,10 @@
 #define JIT_CALLTMPSTART 0x48
 #define JIT_REXTMP       0x4B
 
+#define _r_8B( R, D,B    )     (_qMrm(_b10,_rN(R),_r8(B))                      
     ,_jit_I((long)(D)))
+#define _r_8IS( R, D,I,S)      (_qMrm(_b00,_rN(R),_b100 
),_SIB(_SCL(S),_r8(I),_b101 ),_jit_I((long)(D)))
+#define _r_8BIS(R, D,B,I,S)    (_qMrm(_b10,_rN(R),_b100 
),_SIB(_SCL(S),_r8(I),_r8(B)),_jit_I((long)(D)))
+
 #define _qMrm(Md,R,M)  _jit_B((_M(Md)<<6)|(_r((R & 0x7))<<3)|_m((M & 0x7)))
 #define _r_D(  R, D      )     (_Mrm(_b00,_rN(R),_b100 ),_SIB(0,_b100,_b101)   
     ,_jit_I((long)(D)))
 #define _r_Q(  R, D      )     (_qMrm(_b00,_rN(R),_b100 ),_SIB(0,_b100,_b101)  
      ,_jit_I((long)(D)))
@@ -138,4 +142,3 @@
 #endif
 #endif /* __lightning_asm_h */
 
-
diff --git a/lightning/i386/asm-i386.h b/lightning/i386/asm-i386.h
index 6169bb4..c04d2c1 100644
--- a/lightning/i386/asm-i386.h
+++ b/lightning/i386/asm-i386.h
@@ -119,7 +119,7 @@ typedef _uc         jit_insn;
 #define _CKD8(D)        _ck_d(8, ((_uc) _OFF4(D)) )
 
 #define _D8(D)          (_jit_B(0), ((*(_PUC(_jit.x.pc)-1))= _CKD8(D)))
-#define _D32(D)         (_jit_L(0), ((*(_PUL(_jit.x.pc)-1))= _OFF4(D)))
+#define _D32(D)         (_jit_I(0), ((*(_PUI(_jit.x.pc)-1))= _OFF4(D)))
 
 #ifndef _ASM_SAFETY
 # define _M(M)         (M)
@@ -153,9 +153,9 @@ typedef _uc         jit_insn;
 #define _r_0BIS(R,   B,I,S)    (_Mrm(_b00,_rN(R),_b100 
),_SIB(_SCL(S),_r4(I),_r4(B))      )
 #define _r_1B( R, D,B    )     (_Mrm(_b01,_rN(R),_r4(B))                       
     ,_jit_B((long)(D)))
 #define _r_1BIS(R, D,B,I,S)    (_Mrm(_b01,_rN(R),_b100 
),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_B((long)(D)))
-#define _r_4B( R, D,B    )     (_Mrm(_b10,_rN(R),_r4(B))                       
     ,_jit_L((long)(D)))
-#define _r_4IS( R, D,I,S)      (_Mrm(_b00,_rN(R),_b100 
),_SIB(_SCL(S),_r4(I),_b101 ),_jit_L((long)(D)))
-#define _r_4BIS(R, D,B,I,S)    (_Mrm(_b10,_rN(R),_b100 
),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_L((long)(D)))
+#define _r_4B( R, D,B    )     (_Mrm(_b10,_rN(R),_r4(B))                       
     ,_jit_I((long)(D)))
+#define _r_4IS( R, D,I,S)      (_Mrm(_b00,_rN(R),_b100 
),_SIB(_SCL(S),_r4(I),_b101 ),_jit_I((long)(D)))
+#define _r_4BIS(R, D,B,I,S)    (_Mrm(_b10,_rN(R),_b100 
),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_I((long)(D)))
 
 #define _r_DB(  R, D,B    )    ((_s0P(D) && (B != _EBP) ? _r_0B  (R,  B    ) : 
(_s8P(D) ? _r_1B(  R,D,B    ) : _r_4B(  R,D,B    ))))
 #define _r_DBIS(R, D,B,I,S)    ((_s0P(D)                ? _r_0BIS(R,  B,I,S) : 
(_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S))))
@@ -179,7 +179,7 @@ typedef _uc         jit_insn;
 #define         _OOr(       OP,R                       )  ( _jit_B((OP)>>8), 
_jit_B( (OP)|_r(R))                                 )
 #define          _Os(       OP,B                       )  (    _s8P(B) ? 
_jit_B(((OP)|_b10)) : _jit_B(OP)                        )
 #define            _sW(                             W  )  (                    
               _s8P(W) ? _jit_B(W):_jit_W(W)      )
-#define            _sL(                             L  )  (                    
               _s8P(L) ? _jit_B(L):_jit_L(L)      )
+#define            _sL(                             L  )  (                    
               _s8P(L) ? _jit_B(L):_jit_I(L)      )
 #define          _O_W(      OP                     ,W  )  (        _O      (  
OP  )                          ,_jit_W(W)          )
 #define          _O_D8(     OP                     ,D  )  (        _O      (  
OP  )                         ,_D8(D)      )
 #define          _O_D32(     OP                    ,D  )  (        _O      (  
OP  )                         ,_D32(D)     )
@@ -189,12 +189,12 @@ typedef _uc               jit_insn;
 #define          _O_W_B(    OP                     ,W,B)  (        _O      (  
OP  )                          ,_jit_W(W),_jit_B(B))
 #define          _Or_B(     OP,R                   ,B  )  (        _Or     (  
OP,R)                          ,_jit_B(B)          )
 #define          _Or_W(     OP,R                   ,W  )  (        _Or     (  
OP,R)                          ,_jit_W(W)          )
-#define          _Or_L(     OP,R                   ,L  )  (        _Or     (  
OP,R)                          ,_jit_L(L)          )
+#define          _Or_L(     OP,R                   ,L  )  (        _Or     (  
OP,R)                          ,_jit_I(L)          )
 #define          _O_Mrm(    OP  ,MO,R,M                )  (        _O      (  
OP  ),_Mrm(MO,R,M            )             )
 #define         _OO_Mrm(    OP  ,MO,R,M                )  (       _OO      (  
OP  ),_Mrm(MO,R,M            )             )
 #define          _O_Mrm_B(  OP  ,MO,R,M            ,B  )  (        _O      (  
OP  ),_Mrm(MO,R,M            ) ,_jit_B(B)          )
 #define          _O_Mrm_W(  OP  ,MO,R,M            ,W  )  (        _O      (  
OP  ),_Mrm(MO,R,M            ) ,_jit_W(W)          )
-#define          _O_Mrm_L(  OP  ,MO,R,M            ,L  )  (        _O      (  
OP  ),_Mrm(MO,R,M            ) ,_jit_L(L)          )
+#define          _O_Mrm_L(  OP  ,MO,R,M            ,L  )  (        _O      (  
OP  ),_Mrm(MO,R,M            ) ,_jit_I(L)          )
 #define         _OO_Mrm_B(  OP  ,MO,R,M            ,B  )  (       _OO      (  
OP  ),_Mrm(MO,R,M            ) ,_jit_B(B)          )
 #define          _Os_Mrm_sW(OP  ,MO,R,M            ,W  )  (        _Os     (  
OP,W),_Mrm(MO,R,M            ),_sW(W)      )
 #define          _Os_Mrm_sL(OP  ,MO,R,M            ,L  )  (        _Os     (  
OP,L),_Mrm(MO,R,M            ),_sL(L)      )
@@ -202,7 +202,7 @@ typedef _uc         jit_insn;
 #define         _OO_r_X(    OP     ,R  ,MD,MB,MI,MS    )  (       _OO      (  
OP  ),_r_X(   R  ,MD,MB,MI,MS)             )
 #define          _O_r_X_B(  OP     ,R  ,MD,MB,MI,MS,B  )  (        _O      (  
OP  ),_r_X(   R  ,MD,MB,MI,MS) ,_jit_B(B)          )
 #define          _O_r_X_W(  OP     ,R  ,MD,MB,MI,MS,W  )  (        _O      (  
OP  ),_r_X(   R  ,MD,MB,MI,MS) ,_jit_W(W)          )
-#define          _O_r_X_L(  OP     ,R  ,MD,MB,MI,MS,L  )  (        _O      (  
OP  ),_r_X(   R  ,MD,MB,MI,MS) ,_jit_L(L)          )
+#define          _O_r_X_L(  OP     ,R  ,MD,MB,MI,MS,L  )  (        _O      (  
OP  ),_r_X(   R  ,MD,MB,MI,MS) ,_jit_I(L)          )
 #define         _OO_r_X_B(  OP     ,R  ,MD,MB,MI,MS,B  )  (       _OO      (  
OP  ),_r_X(   R  ,MD,MB,MI,MS) ,_jit_B(B)          )
 #define          _Os_r_X_sW(OP     ,R  ,MD,MB,MI,MS,W  )  (        _Os     (  
OP,W),_r_X(   R  ,MD,MB,MI,MS),_sW(W)      )
 #define          _Os_r_X_sL(OP     ,R  ,MD,MB,MI,MS,L  )  (        _Os     (  
OP,L),_r_X(   R  ,MD,MB,MI,MS),_sL(L)      )
@@ -1033,9 +1033,9 @@ typedef _uc               jit_insn;
 #define FNSTSWr(RD)            ((RD == _AX || RD == _EAX) ? _OO (0xdfe0)       
        \
                                 : JITFAIL ("AX or EAX expected"))
 /* N byte NOPs */
-#define NOPi(N)                (((  (N)    >= 8) ? 
(_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00),_jit_B(0x90)) : (void) 0), 
\
-                        (( ((N)&7) == 7) ? 
(_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00)) : \
-                         ( ((N)&7) == 6) ? 
(_jit_B(0x8d),_jit_B(0xb6),_jit_L(0x00)) : \
+#define NOPi(N)                (((  (N)    >= 8) ? 
(_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_I(0x00),_jit_B(0x90)) : (void) 0), 
\
+                        (( ((N)&7) == 7) ? 
(_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_I(0x00)) : \
+                         ( ((N)&7) == 6) ? 
(_jit_B(0x8d),_jit_B(0xb6),_jit_I(0x00)) : \
                          ( ((N)&7) == 5) ? 
(_jit_B(0x90),_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \
 /* leal 0(,%esi), %esi */ ( ((N)&7) == 4) ? 
(_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \
 /* leal (,%esi), %esi */  ( ((N)&7) == 3) ? 
(_jit_B(0x8d),_jit_B(0x76),_jit_B(0x00)) : \
@@ -1044,6 +1044,263 @@ typedef _uc             jit_insn;
                          ( ((N)&7) == 0) ? 0 : \
                          JITFAIL(".align argument too large")))
 
+/* --- Media 128-bit instructions ------------------------------------------ */
+
+enum {
+  X86_SSE_CVTIS  = 0x2a,
+  X86_SSE_CVTSI  = 0x2d,
+  X86_SSE_UCOMI  = 0x2e,
+  X86_SSE_COMI   = 0x2f,
+  X86_SSE_SQRT   = 0x51,
+  X86_SSE_RSQRT  = 0x52,
+  X86_SSE_RCP    = 0x53,
+  X86_SSE_AND    = 0x54,
+  X86_SSE_ANDN   = 0x55,
+  X86_SSE_OR     = 0x56,
+  X86_SSE_XOR    = 0x57,
+  X86_SSE_ADD    = 0x58,
+  X86_SSE_MUL    = 0x59,
+  X86_SSE_CVTSD  = 0x5a,
+  X86_SSE_CVTDT  = 0x5b,
+  X86_SSE_SUB    = 0x5c,
+  X86_SSE_MIN    = 0x5d,
+  X86_SSE_DIV    = 0x5e,
+  X86_SSE_MAX    = 0x5f,
+};
+
+/*                                                                     _format 
        Opcd            ,Mod ,r      ,m         ,mem=dsp+sib    ,imm... */
+
+#define __SSELrr(OP,RS,RSA,RD,RDA)     (_REXLrr(RD, RS),               _OO_Mrm 
        (0x0f00|(OP)    ,_b11,RDA(RD),RSA(RS)                           ))
+#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA)        (_REXLmr(MB, MI, RD),           
_OO_r_X         (0x0f00|(OP)         ,RDA(RD)           ,MD,MB,MI,MS            
))
+#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS)        (_REXLrm(RS, MB, MI),           
_OO_r_X         (0x0f00|(OP)         ,RSA(RS)           ,MD,MB,MI,MS            
))
+
+#define __SSEQrr(OP,RS,RSA,RD,RDA)     (_REXQrr(RD, RS),               _OO_Mrm 
        (0x0f00|(OP)    ,_b11,RDA(RD),RSA(RS)                           ))
+#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA)        (_REXQmr(MB, MI, RD),           
_OO_r_X         (0x0f00|(OP)         ,RDA(RD)           ,MD,MB,MI,MS            
))
+#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS)        (_REXQrm(RS, MB, MI),           
_OO_r_X         (0x0f00|(OP)         ,RSA(RS)           ,MD,MB,MI,MS            
))
+
+#define _SSELrr(PX,OP,RS,RSA,RD,RDA)                                   
(_B(PX), __SSELrr(OP, RS, RSA, RD, RDA))
+#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA)                              
(_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA))
+#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS)                              
(_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS))
+
+#define _SSEQrr(PX,OP,RS,RSA,RD,RDA)                                   
(_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA))
+#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA)                              
(_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA))
+#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS)                              
(_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS))
+
+#define _SSEPSrr(OP,RS,RD)             __SSELrr(      OP, RS,_rX, RD,_rX)
+#define _SSEPSmr(OP,MD,MB,MI,MS,RD)    __SSELmr(      OP, MD, MB, MI, MS, 
RD,_rX)
+#define _SSEPSrm(OP,RS,MD,MB,MI,MS)    __SSELrm(      OP, RS,_rX, MD, MB, MI, 
MS)
+
+#define _SSEPDrr(OP,RS,RD)              _SSELrr(0x66, OP, RS,_rX, RD,_rX)
+#define _SSEPDmr(OP,MD,MB,MI,MS,RD)     _SSELmr(0x66, OP, MD, MB, MI, MS, 
RD,_rX)
+#define _SSEPDrm(OP,RS,MD,MB,MI,MS)     _SSELrm(0x66, OP, RS,_rX, MD, MB, MI, 
MS)
+
+#define _SSESSrr(OP,RS,RD)              _SSELrr(0xf3, OP, RS,_rX, RD,_rX)
+#define _SSESSmr(OP,MD,MB,MI,MS,RD)     _SSELmr(0xf3, OP, MD, MB, MI, MS, 
RD,_rX)
+#define _SSESSrm(OP,RS,MD,MB,MI,MS)     _SSELrm(0xf3, OP, RS,_rX, MD, MB, MI, 
MS)
+
+#define _SSESDrr(OP,RS,RD)              _SSELrr(0xf2, OP, RS,_rX, RD,_rX)
+#define _SSESDmr(OP,MD,MB,MI,MS,RD)     _SSELmr(0xf2, OP, MD, MB, MI, MS, 
RD,_rX)
+#define _SSESDrm(OP,RS,MD,MB,MI,MS)     _SSELrm(0xf2, OP, RS,_rX, MD, MB, MI, 
MS)
+
+#define ADDPSrr(RS, RD)                        _SSEPSrr(X86_SSE_ADD, RS, RD)
+#define ADDPSmr(MD, MB, MI, MS, RD)    _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, 
RD)
+#define ADDPDrr(RS, RD)                        _SSEPDrr(X86_SSE_ADD, RS, RD)
+#define ADDPDmr(MD, MB, MI, MS, RD)    _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, 
RD)
+
+#define ADDSSrr(RS, RD)                        _SSESSrr(X86_SSE_ADD, RS, RD)
+#define ADDSSmr(MD, MB, MI, MS, RD)    _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, 
RD)
+#define ADDSDrr(RS, RD)                        _SSESDrr(X86_SSE_ADD, RS, RD)
+#define ADDSDmr(MD, MB, MI, MS, RD)    _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, 
RD)
+
+#define ANDNPSrr(RS, RD)               _SSEPSrr(X86_SSE_ANDN, RS, RD)
+#define ANDNPSmr(MD, MB, MI, MS, RD)   _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, 
RD)
+#define ANDNPDrr(RS, RD)               _SSEPDrr(X86_SSE_ANDN, RS, RD)
+#define ANDNPDmr(MD, MB, MI, MS, RD)   _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, 
RD)
+
+#define ANDPSrr(RS, RD)                        _SSEPSrr(X86_SSE_AND, RS, RD)
+#define ANDPSmr(MD, MB, MI, MS, RD)    _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, 
RD)
+#define ANDPDrr(RS, RD)                        _SSEPDrr(X86_SSE_AND, RS, RD)
+#define ANDPDmr(MD, MB, MI, MS, RD)    _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, 
RD)
+
+#define DIVPSrr(RS, RD)                        _SSEPSrr(X86_SSE_DIV, RS, RD)
+#define DIVPSmr(MD, MB, MI, MS, RD)    _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, 
RD)
+#define DIVPDrr(RS, RD)                        _SSEPDrr(X86_SSE_DIV, RS, RD)
+#define DIVPDmr(MD, MB, MI, MS, RD)    _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, 
RD)
+
+#define DIVSSrr(RS, RD)                        _SSESSrr(X86_SSE_DIV, RS, RD)
+#define DIVSSmr(MD, MB, MI, MS, RD)    _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, 
RD)
+#define DIVSDrr(RS, RD)                        _SSESDrr(X86_SSE_DIV, RS, RD)
+#define DIVSDmr(MD, MB, MI, MS, RD)    _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, 
RD)
+
+#define MAXPSrr(RS, RD)                        _SSEPSrr(X86_SSE_MAX, RS, RD)
+#define MAXPSmr(MD, MB, MI, MS, RD)    _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, 
RD)
+#define MAXPDrr(RS, RD)                        _SSEPDrr(X86_SSE_MAX, RS, RD)
+#define MAXPDmr(MD, MB, MI, MS, RD)    _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, 
RD)
+
+#define MAXSSrr(RS, RD)                        _SSESSrr(X86_SSE_MAX, RS, RD)
+#define MAXSSmr(MD, MB, MI, MS, RD)    _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, 
RD)
+#define MAXSDrr(RS, RD)                        _SSESDrr(X86_SSE_MAX, RS, RD)
+#define MAXSDmr(MD, MB, MI, MS, RD)    _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, 
RD)
+
+#define MINPSrr(RS, RD)                        _SSEPSrr(X86_SSE_MIN, RS, RD)
+#define MINPSmr(MD, MB, MI, MS, RD)    _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, 
RD)
+#define MINPDrr(RS, RD)                        _SSEPDrr(X86_SSE_MIN, RS, RD)
+#define MINPDmr(MD, MB, MI, MS, RD)    _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, 
RD)
+
+#define MINSSrr(RS, RD)                        _SSESSrr(X86_SSE_MIN, RS, RD)
+#define MINSSmr(MD, MB, MI, MS, RD)    _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, 
RD)
+#define MINSDrr(RS, RD)                        _SSESDrr(X86_SSE_MIN, RS, RD)
+#define MINSDmr(MD, MB, MI, MS, RD)    _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, 
RD)
+
+#define MULPSrr(RS, RD)                        _SSEPSrr(X86_SSE_MUL, RS, RD)
+#define MULPSmr(MD, MB, MI, MS, RD)    _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, 
RD)
+#define MULPDrr(RS, RD)                        _SSEPDrr(X86_SSE_MUL, RS, RD)
+#define MULPDmr(MD, MB, MI, MS, RD)    _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, 
RD)
+
+#define MULSSrr(RS, RD)                        _SSESSrr(X86_SSE_MUL, RS, RD)
+#define MULSSmr(MD, MB, MI, MS, RD)    _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, 
RD)
+#define MULSDrr(RS, RD)                        _SSESDrr(X86_SSE_MUL, RS, RD)
+#define MULSDmr(MD, MB, MI, MS, RD)    _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, 
RD)
+
+#define ORPSrr(RS, RD)                 _SSEPSrr(X86_SSE_OR, RS, RD)
+#define ORPSmr(MD, MB, MI, MS, RD)     _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
+#define ORPDrr(RS, RD)                 _SSEPDrr(X86_SSE_OR, RS, RD)
+#define ORPDmr(MD, MB, MI, MS, RD)     _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
+
+#define RCPPSrr(RS, RD)                        _SSEPSrr(X86_SSE_RCP, RS, RD)
+#define RCPPSmr(MD, MB, MI, MS, RD)    _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, 
RD)
+#define RCPSSrr(RS, RD)                        _SSESSrr(X86_SSE_RCP, RS, RD)
+#define RCPSSmr(MD, MB, MI, MS, RD)    _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, 
RD)
+
+#define RSQRTPSrr(RS, RD)              _SSEPSrr(X86_SSE_RSQRT, RS, RD)
+#define RSQRTPSmr(MD, MB, MI, MS, RD)  _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, 
RD)
+#define RSQRTSSrr(RS, RD)              _SSESSrr(X86_SSE_RSQRT, RS, RD)
+#define RSQRTSSmr(MD, MB, MI, MS, RD)  _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, 
RD)
+
+#define SQRTPSrr(RS, RD)               _SSEPSrr(X86_SSE_SQRT, RS, RD)
+#define SQRTPSmr(MD, MB, MI, MS, RD)   _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, 
RD)
+#define SQRTPDrr(RS, RD)               _SSEPDrr(X86_SSE_SQRT, RS, RD)
+#define SQRTPDmr(MD, MB, MI, MS, RD)   _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, 
RD)
+
+#define SQRTSSrr(RS, RD)               _SSESSrr(X86_SSE_SQRT, RS, RD)
+#define SQRTSSmr(MD, MB, MI, MS, RD)   _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, 
RD)
+#define SQRTSDrr(RS, RD)               _SSESDrr(X86_SSE_SQRT, RS, RD)
+#define SQRTSDmr(MD, MB, MI, MS, RD)   _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, 
RD)
+
+#define SUBPSrr(RS, RD)                        _SSEPSrr(X86_SSE_SUB, RS, RD)
+#define SUBPSmr(MD, MB, MI, MS, RD)    _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, 
RD)
+#define SUBPDrr(RS, RD)                        _SSEPDrr(X86_SSE_SUB, RS, RD)
+#define SUBPDmr(MD, MB, MI, MS, RD)    _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, 
RD)
+
+#define SUBSSrr(RS, RD)                        _SSESSrr(X86_SSE_SUB, RS, RD)
+#define SUBSSmr(MD, MB, MI, MS, RD)    _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, 
RD)
+#define SUBSDrr(RS, RD)                        _SSESDrr(X86_SSE_SUB, RS, RD)
+#define SUBSDmr(MD, MB, MI, MS, RD)    _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, 
RD)
+
+#define XORPSrr(RS, RD)                        _SSEPSrr(X86_SSE_XOR, RS, RD)
+#define XORPSmr(MD, MB, MI, MS, RD)    _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, 
RD)
+#define XORPDrr(RS, RD)                        _SSEPDrr(X86_SSE_XOR, RS, RD)
+#define XORPDmr(MD, MB, MI, MS, RD)    _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, 
RD)
+
+#define COMISSrr(RS, RD)               _SSESSrr(X86_SSE_COMI, RS, RD)
+#define COMISSmr(MD, MB, MI, MS, RD)   _SSESSmr(X86_SSE_COMI, MD, MB, MI, MS, 
RD)
+#define COMISDrr(RS, RD)               _SSESDrr(X86_SSE_COMI, RS, RD)
+#define COMISDmr(MD, MB, MI, MS, RD)   _SSESDmr(X86_SSE_COMI, MD, MB, MI, MS, 
RD)
+
+#define UCOMISSrr(RS, RD)              _SSESSrr(X86_SSE_UCOMI, RS, RD)
+#define UCOMISSmr(MD, MB, MI, MS, RD)  _SSESSmr(X86_SSE_UCOMI, MD, MB, MI, MS, 
RD)
+#define UCOMISDrr(RS, RD)              _SSESDrr(X86_SSE_UCOMI, RS, RD)
+#define UCOMISDmr(MD, MB, MI, MS, RD)  _SSESDmr(X86_SSE_UCOMI, MD, MB, MI, MS, 
RD)
+
+#define MOVAPSrr(RS, RD)               _SSEPSrr(0x28, RS, RD)
+#define MOVAPSmr(MD, MB, MI, MS, RD)   _SSEPSmr(0x28, MD, MB, MI, MS, RD)
+#define MOVAPSrm(RS, MD, MB, MI, MS)   _SSEPSrm(0x29, RS, MD, MB, MI, MS)
+
+#define MOVAPDrr(RS, RD)               _SSEPDrr(0x28, RS, RD)
+#define MOVAPDmr(MD, MB, MI, MS, RD)   _SSEPDmr(0x28, MD, MB, MI, MS, RD)
+#define MOVAPDrm(RS, MD, MB, MI, MS)   _SSEPDrm(0x29, RS, MD, MB, MI, MS)
+
+#define CVTPS2PIrr(RS, RD)             __SSELrr(      X86_SSE_CVTSI, RS,_rX, 
RD,_rM)
+#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr(      X86_SSE_CVTSI, MD, MB, 
MI, MS, RD,_rM)
+#define CVTPD2PIrr(RS, RD)              _SSELrr(0x66, X86_SSE_CVTSI, RS,_rX, 
RD,_rM)
+#define CVTPD2PImr(MD, MB, MI, MS, RD)  _SSELmr(0x66, X86_SSE_CVTSI, MD, MB, 
MI, MS, RD,_rM)
+
+#define CVTPI2PSrr(RS, RD)             __SSELrr(      X86_SSE_CVTIS, RS,_rM, 
RD,_rX)
+#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr(      X86_SSE_CVTIS, MD, MB, 
MI, MS, RD,_rX)
+#define CVTPI2PDrr(RS, RD)              _SSELrr(0x66, X86_SSE_CVTIS, RS,_rM, 
RD,_rX)
+#define CVTPI2PDmr(MD, MB, MI, MS, RD)  _SSELmr(0x66, X86_SSE_CVTIS, MD, MB, 
MI, MS, RD,_rX)
+
+#define CVTPS2PDrr(RS, RD)             __SSELrr(      X86_SSE_CVTSD, RS,_rX, 
RD,_rX)
+#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr(      X86_SSE_CVTSD, MD, MB, 
MI, MS, RD,_rX)
+#define CVTPD2PSrr(RS, RD)              _SSELrr(0x66, X86_SSE_CVTSD, RS,_rX, 
RD,_rX)
+#define CVTPD2PSmr(MD, MB, MI, MS, RD)  _SSELmr(0x66, X86_SSE_CVTSD, MD, MB, 
MI, MS, RD,_rX)
+
+#define CVTSS2SDrr(RS, RD)              _SSELrr(0xf3, X86_SSE_CVTSD, RS,_rX, 
RD,_rX)
+#define CVTSS2SDmr(MD, MB, MI, MS, RD)  _SSELmr(0xf3, X86_SSE_CVTSD, MD, MB, 
MI, MS, RD,_rX)
+#define CVTSD2SSrr(RS, RD)              _SSELrr(0xf2, X86_SSE_CVTSD, RS,_rX, 
RD,_rX)
+#define CVTSD2SSmr(MD, MB, MI, MS, RD)  _SSELmr(0xf2, X86_SSE_CVTSD, MD, MB, 
MI, MS, RD,_rX)
+
+#define CVTSS2SILrr(RS, RD)             _SSELrr(0xf3, X86_SSE_CVTSI, RS,_rX, 
RD,_r4)
+#define CVTSS2SILmr(MD, MB, MI, MS, RD)         _SSELmr(0xf3, X86_SSE_CVTSI, 
MD, MB, MI, MS, RD,_r4)
+#define CVTSD2SILrr(RS, RD)             _SSELrr(0xf2, X86_SSE_CVTSI, RS,_rX, 
RD,_r4)
+#define CVTSD2SILmr(MD, MB, MI, MS, RD)         _SSELmr(0xf2, X86_SSE_CVTSI, 
MD, MB, MI, MS, RD,_r4)
+
+#define CVTSI2SSLrr(RS, RD)             _SSELrr(0xf3, X86_SSE_CVTIS, RS,_r4, 
RD,_rX)
+#define CVTSI2SSLmr(MD, MB, MI, MS, RD)         _SSELmr(0xf3, X86_SSE_CVTIS, 
MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SDLrr(RS, RD)             _SSELrr(0xf2, X86_SSE_CVTIS, RS,_r4, 
RD,_rX)
+#define CVTSI2SDLmr(MD, MB, MI, MS, RD)         _SSELmr(0xf2, X86_SSE_CVTIS, 
MD, MB, MI, MS, RD,_rX)
+
+#define CVTSS2SIQrr(RS, RD)             _SSEQrr(0xf3, X86_SSE_CVTSI, RS,_rX, 
RD,_r8)
+#define CVTSS2SIQmr(MD, MB, MI, MS, RD)         _SSEQmr(0xf3, X86_SSE_CVTSI, 
MD, MB, MI, MS, RD,_r8)
+#define CVTSD2SIQrr(RS, RD)             _SSEQrr(0xf2, X86_SSE_CVTSI, RS,_rX, 
RD,_r8)
+#define CVTSD2SIQmr(MD, MB, MI, MS, RD)         _SSEQmr(0xf2, X86_SSE_CVTSI, 
MD, MB, MI, MS, RD,_r8)
+
+#define CVTSI2SSQrr(RS, RD)             _SSEQrr(0xf3, X86_SSE_CVTIS, RS,_r8, 
RD,_rX)
+#define CVTSI2SSQmr(MD, MB, MI, MS, RD)         _SSEQmr(0xf3, X86_SSE_CVTIS, 
MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SDQrr(RS, RD)             _SSEQrr(0xf2, X86_SSE_CVTIS, RS,_r8, 
RD,_rX)
+#define CVTSI2SDQmr(MD, MB, MI, MS, RD)         _SSEQmr(0xf2, X86_SSE_CVTIS, 
MD, MB, MI, MS, RD,_rX)
+
+#define MOVDLXrr(RS, RD)                _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX)
+#define MOVDLXmr(MD, MB, MI, MS, RD)    _SSELmr(0x66, 0x6e, MD, MB, MI, MS, 
RD,_rX)
+#define MOVDQXrr(RS, RD)                _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX)
+#define MOVDQXmr(MD, MB, MI, MS, RD)    _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, 
RD,_rX)
+
+#define MOVDXLrr(RS, RD)                _SSELrr(0x66, 0x7e, RS,_rX, RD,_r4)
+#define MOVDXLrm(RS, MD, MB, MI, MS)    _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, 
MI, MS)
+#define MOVDXQrr(RS, RD)                _SSEQrr(0x66, 0x7e, RS,_rX, RD,_r8)
+#define MOVDXQrm(RS, MD, MB, MI, MS)    _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, 
MI, MS)
+
+#define MOVDLMrr(RS, RD)               __SSELrr(      0x6e, RS,_r4, RD,_rM)
+#define MOVDLMmr(MD, MB, MI, MS, RD)   __SSELmr(      0x6e, MD, MB, MI, MS, 
RD,_rM)
+#define MOVDQMrr(RS, RD)               __SSEQrr(      0x6e, RS,_r8, RD,_rM)
+#define MOVDQMmr(MD, MB, MI, MS, RD)   __SSEQmr(      0x6e, MD, MB, MI, MS, 
RD,_rM)
+
+#define MOVDMLrr(RS, RD)               __SSELrr(      0x7e, RS,_rM, RD,_r4)
+#define MOVDMLrm(RS, MD, MB, MI, MS)   __SSELrm(      0x7e, RS,_rM, MD, MB, 
MI, MS)
+#define MOVDMQrr(RS, RD)               __SSEQrr(      0x7e, RS,_rM, RD,_r8)
+#define MOVDMQrm(RS, MD, MB, MI, MS)   __SSEQrm(      0x7e, RS,_rM, MD, MB, 
MI, MS)
+
+#define MOVDQ2Qrr(RS, RD)               _SSELrr(0xf2, 0xd6, RS,_rX, RD,_rM)
+#define MOVHLPSrr(RS, RD)              __SSELrr(      0x12, RS,_rX, RD,_rX)
+#define MOVLHPSrr(RS, RD)              __SSELrr(      0x16, RS,_rX, RD,_rX)
+
+#define MOVDQArr(RS, RD)                _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX)
+#define MOVDQAmr(MD, MB, MI, MS, RD)    _SSELmr(0x66, 0x6f, MD, MB, MI, MS, 
RD,_rX)
+#define MOVDQArm(RS, MD, MB, MI, MS)    _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, 
MI, MS)
+
+#define MOVDQUrr(RS, RD)                _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX)
+#define MOVDQUmr(MD, MB, MI, MS, RD)    _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, 
RD,_rX)
+#define MOVDQUrm(RS, MD, MB, MI, MS)    _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, 
MI, MS)
+
+#define MOVHPDmr(MD, MB, MI, MS, RD)    _SSELmr(0x66, 0x16, MD, MB, MI, MS, 
RD,_rX)
+#define MOVHPDrm(RS, MD, MB, MI, MS)    _SSELrm(0x66, 0x17, RS,_rX, MD, MB, 
MI, MS)
+#define MOVHPSmr(MD, MB, MI, MS, RD)   __SSELmr(      0x16, MD, MB, MI, MS, 
RD,_rX)
+#define MOVHPSrm(RS, MD, MB, MI, MS)   __SSELrm(      0x17, RS,_rX, MD, MB, 
MI, MS)
+
+#define MOVLPDmr(MD, MB, MI, MS, RD)    _SSELmr(0x66, 0x12, MD, MB, MI, MS, 
RD,_rX)
+#define MOVLPDrm(RS, MD, MB, MI, MS)    _SSELrm(0x66, 0x13, RS,_rX, MD, MB, 
MI, MS)
+#define MOVLPSmr(MD, MB, MI, MS, RD)   __SSELmr(      0x12, MD, MB, MI, MS, 
RD,_rX)
+#define MOVLPSrm(RS, MD, MB, MI, MS)   __SSELrm(      0x13, RS,_rX, MD, MB, 
MI, MS)
 
 /*** References:                                                               
                */
 /*                                                                             
                */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]