X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fmesa%2Fx86%2Fmmx_blend.S;h=20ac5a20adfec3ce96b5e99e05f7a68665a0f7c1;hb=f440b0ddd9690a2f8d7b7eed9b56ff77407b9114;hp=259d71c2fa0262b6f67f86d1db11e596dd047fc6;hpb=3fe2bb8933c15a7091838fd982dbad402fe6ad43;p=mesa.git diff --git a/src/mesa/x86/mmx_blend.S b/src/mesa/x86/mmx_blend.S index 259d71c2fa0..20ac5a20adf 100644 --- a/src/mesa/x86/mmx_blend.S +++ b/src/mesa/x86/mmx_blend.S @@ -1,12 +1,11 @@ + ; /* * Written by José Fonseca */ -#include "matypes.h" - -/* FIXME: The pairing rules must be checked as they aren't being fully obeyed. - */ +#ifdef USE_MMX_ASM +#include "matypes.h" /* integer multiplication - alpha plus one * @@ -24,9 +23,9 @@ */ #define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \ PSUBW ( MX1, MA1 ) /* a1 + 1 | a1 + 1 | a1 + 1 | a1 + 1 */ ;\ -TWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\ - ;\ PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ + ;\ +TWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\ TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ ;\ PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\ @@ -51,15 +50,15 @@ TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ ;\ MOVQ ( MA1, MP1 ) ;\ -TWO(MOVQ ( MA2, MP2 )) ;\ + PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ ;\ - PSRLW ( CONST(8), MP1 ) /* t1 >> 8 */ ;\ -TWO(PSRLW ( CONST(8), MP2 )) /* t2 >> 8 */ ;\ +TWO(MOVQ ( MA2, MP2 )) ;\ +TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ ;\ PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ -TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ - ;\ PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ + ;\ +TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ @@ -76,21 +75,21 @@ TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ #define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \ PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ -TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ - ;\ PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ + ;\ +TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ ;\ MOVQ ( MA1, MP1 ) ;\ -TWO(MOVQ ( MA2, MP2 )) ;\ + PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ ;\ - PSRLW ( CONST(8), MP1 ) /* t1 >> 8 */ ;\ -TWO(PSRLW ( CONST(8), MP2 )) /* t2 >> 8 */ ;\ +TWO(MOVQ ( MA2, MP2 )) ;\ +TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ ;\ PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ -TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ - ;\ PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ + ;\ +TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ @@ -98,19 +97,18 @@ TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ #define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \ PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ -TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ - ;\ PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ -TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ - ;\ PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ + ;\ +TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ +TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ ;\ MOVQ ( MA1, MP1 ) ;\ -TWO(MOVQ ( MA2, MP2 )) ;\ + PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ ;\ - PSRLW ( CONST(8), MP1 ) /* t1 >> 8 */ ;\ -TWO(PSRLW ( CONST(8), MP2 )) /* t2 >> 8 */ ;\ +TWO(MOVQ ( MA2, MP2 )) ;\ +TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ ;\ PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ @@ -130,12 +128,11 @@ TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ #define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \ PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ -TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ - ;\ PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ -TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ - ;\ PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ + ;\ +TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ +TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ ;\ PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\ @@ -151,10 +148,10 @@ TWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ ;\ MOVQ ( MA1, MP1 ) ;\ -TWO(MOVQ ( MA2, MP2 )) ;\ + PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ ;\ - PSRLW ( CONST(8), MP1 ) /* t1 >> 8 */ ;\ -TWO(PSRLW ( CONST(8), MP2 )) /* t2 >> 8 */ ;\ +TWO(MOVQ ( MA2, MP2 )) ;\ +TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ ;\ PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ @@ -176,25 +173,24 @@ TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ #define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \ PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ -TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ - ;\ PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ -TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ - ;\ PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ + ;\ +TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ +TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ ;\ MOVQ ( MA1, MP1 ) ;\ -TWO(MOVQ ( MA2, MP2 )) ;\ + PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ ;\ - PSRLW ( CONST(8), MP1 ) /* t1 >> 8 */ ;\ -TWO(PSRLW ( CONST(8), MP2 )) /* t2 >> 8 */ ;\ +TWO(MOVQ ( MA2, MP2 )) ;\ +TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ ;\ - PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ -TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ + PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ + PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\ ;\ - PSRLW ( CONST(7), MP1 ) /* t1 >> 15 */ ;\ -TWO(PSRLW ( CONST(7), MP2 )) /* t2 >> 15 */ ;\ +TWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ +TWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\ ;\ PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\ TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\ @@ -244,7 +240,9 @@ TWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 | pa2 | pa ONE(MOVD ( MSS, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ TWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ - +/* Kevin F. Quinn 2 July 2006 + * Replace data segment constants with text-segment + * constants (via pushl/movq) SEG_DATA ALIGNDATA8 @@ -253,6 +251,11 @@ const_0080: const_80: D_LONG 0x80808080, 0x80808080 +*/ +#define const_0080_l 0x00800080 +#define const_0080_h 0x00800080 +#define const_80_l 0x80808080 +#define const_80_h 0x80808080 SEG_TEXT @@ -260,7 +263,8 @@ const_80: /* Blend transparency function */ -#define TAG(x) x##_transparency +#define TAG(x) CONCAT(x,_transparency) +#define LLTAG(x) LLBL2(x,_transparency) #define INIT \ PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ @@ -281,7 +285,8 @@ const_80: * FIXME: Add some loop unrolling here... */ -#define TAG(x) x##_add +#define TAG(x) CONCAT(x,_add) +#define LLTAG(x) LLBL2(x,_add) #define INIT @@ -301,10 +306,19 @@ TWO(MOVQ ( MM1, REGIND(rgba) )) /* Blend min function */ -#define TAG(x) x##_min +#define TAG(x) CONCAT(x,_min) +#define LLTAG(x) LLBL2(x,_min) +/* Kevin F. Quinn 2nd July 2006 + * Replace data segment constants with text-segment instructions +#define INIT \ + MOVQ ( CONTENT(const_80), MM7 ) + */ #define INIT \ - MOVQ ( CONTENT(const_80), MM7 ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ + PUSH_L ( CONST(const_80_h) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ + PUSH_L ( CONST(const_80_l) ) ;\ + MOVQ ( REGIND(ESP), MM7 ) ;\ + ADD_L ( CONST(8), ESP) #define MAIN( rgba, dest ) \ GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ @@ -324,10 +338,19 @@ TWO(MOVQ ( MM1, REGIND(rgba) )) /* Blend max function */ -#define TAG(x) x##_max +#define TAG(x) CONCAT(x,_max) +#define LLTAG(x) LLBL2(x,_max) +/* Kevin F. Quinn 2nd July 2006 + * Replace data segment constants with text-segment instructions #define INIT \ - MOVQ ( CONTENT(const_80), MM7 ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ + MOVQ ( CONTENT(const_80), MM7 ) + */ +#define INIT \ + PUSH_L ( CONST(const_80_l) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ + PUSH_L ( CONST(const_80_h) ) ;\ + MOVQ ( REGIND(ESP), MM7 ) ;\ + ADD_L ( CONST(8), ESP) #define MAIN( rgba, dest ) \ GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ @@ -347,11 +370,20 @@ TWO(MOVQ ( MM1, REGIND(rgba) )) /* Blend modulate function */ -#define TAG(x) x##_modulate +#define TAG(x) CONCAT(x,_modulate) +#define LLTAG(x) LLBL2(x,_modulate) +/* Kevin F. Quinn 2nd July 2006 + * Replace data segment constants with text-segment instructions +#define INIT \ + MOVQ ( CONTENT(const_0080), MM7 ) + */ #define INIT \ PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ ;\ - MOVQ ( CONTENT(const_0080), MM7 ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ + PUSH_L ( CONST(const_0080_l) ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ ;\ + PUSH_L ( CONST(const_0080_h) ) ;\ + MOVQ ( REGIND(ESP), MM7 ) ;\ + ADD_L ( CONST(8), ESP) #define MAIN( rgba, dest ) \ GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ @@ -362,3 +394,8 @@ TWO(MOVQ ( MM1, REGIND(rgba) )) #include "mmx_blendtmp.h" +#endif + +#if defined (__ELF__) && defined (__linux__) + .section .note.GNU-stack,"",%progbits +#endif