X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fx86%2Fread_rgba_span_x86.S;h=80144b889c72e5b41c12c6934cfeed2eab5a7295;hb=ab9d1011f5549502a4b960c2067cde69856a2719;hp=960cffa50bc61909dc5871d91cc8cd86b1ec8295;hpb=e3358dea660f5dec53a8be9e38d725f4fd829e14;p=mesa.git

diff --git a/src/mesa/x86/read_rgba_span_x86.S b/src/mesa/x86/read_rgba_span_x86.S
index 960cffa50bc..80144b889c7 100644
--- a/src/mesa/x86/read_rgba_span_x86.S
+++ b/src/mesa/x86/read_rgba_span_x86.S
@@ -32,22 +32,23 @@
 
 	.file	"read_rgba_span_x86.S"
 #if !defined(__DJGPP__) && !defined(__MINGW32__) /* this one cries for assyntax.h */
-	.section	.rodata
-	.align 16
-	.type	mask, @object
-	.size	mask, 32
-mask:
-	.long	0xff00ff00
-	.long	0xff00ff00
-	.long	0xff00ff00
-	.long	0xff00ff00
-	.long	0x00ff0000
-	.long	0x00ff0000
-	.long	0x00ff0000
-	.long	0x00ff0000
-
-
-/* I implemented these as macros because the appear in quite a few places,
+/* Kevin F. Quinn 2nd July 2006
+ * Replaced data segment constants with text-segment instructions.
+ */
+#define	LOAD_MASK(mvins,m1,m2) \
+   	pushl	$0xff00ff00 ;\
+   	pushl	$0xff00ff00 ;\
+   	pushl	$0xff00ff00 ;\
+   	pushl	$0xff00ff00 ;\
+	mvins	(%esp), m1	;\
+   	pushl	$0x00ff0000 ;\
+   	pushl	$0x00ff0000 ;\
+   	pushl	$0x00ff0000 ;\
+   	pushl	$0x00ff0000 ;\
+	mvins	(%esp), m2	;\
+	addl	$32, %esp
+
+/* I implemented these as macros because they appear in several places,
  * and I've tweaked them a number of times.  I got tired of changing every
  * place they appear. :)
  */
@@ -84,8 +85,7 @@ _generic_read_RGBA_span_BGRA8888_REV_MMX:
 #ifdef USE_INNER_EMMS
 	emms
 #endif
-	movq	mask, %mm1
-	movq	mask+16, %mm2
+	LOAD_MASK(movq,%mm1,%mm2)
 
 	movl	8(%esp), %ebx	/* source pointer */
 	movl	16(%esp), %edx	/* number of pixels to copy */
@@ -182,8 +182,8 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE:
 #ifdef USE_INNER_EMMS
 	emms
 #endif
-	movq	mask, %mm1
-	movq	mask+16, %mm2
+
+	LOAD_MASK(movq,%mm1,%mm2)
 
 	movl	16(%esp), %ebx	/* source pointer */
 	movl	24(%esp), %edx	/* number of pixels to copy */
@@ -341,8 +341,7 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2:
 	pushl	%esi
 	pushl	%ebx
 
-	movdqa	mask, %xmm1
-	movdqa	mask+16, %xmm2
+	LOAD_MASK(movdqu,%xmm1,%xmm2)
 
 	movl	12(%esp), %ebx	/* source pointer */
 	movl	20(%esp), %edx	/* number of pixels to copy */
@@ -435,7 +434,8 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2:
 	je	.L47
 
 	movq	(%ebx), %xmm0
-
+	addl	$8, %ebx
+        
 	movdqa	%xmm0, %xmm3
 	movdqa	%xmm0, %xmm4
 	andps	%xmm1, %xmm0
@@ -449,6 +449,7 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2:
 	orps	%xmm3, %xmm0
 
 	movq	%xmm0, (%ecx)
+	addl	$8, %ecx        
 .L47:
 
 	testl	$1, %edx
@@ -464,52 +465,28 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2:
 
 
 
-	.section	.rodata
-
-	.align	16
-mask_565:
-	.word	0xf800
-	.word	0x07e0
-	.word	0x001f
-	.word	0x0000
-
-/* Setting SCALE_ADJUST to 5 gives a perfect match with the classic C
- * implementation in Mesa.  Setting SCALE_ADJUST to 0 is slightly faster but
- * at a small cost to accuracy.
+#define MASK_565_L	0x07e0f800
+#define MASK_565_H	0x0000001f
+/* Setting SCALE_ADJUST to 5 gives a perfect match with the
+ * classic C implementation in Mesa.  Setting SCALE_ADJUST
+ * to 0 is slightly faster but at a small cost to accuracy.
  */
-
 #define SCALE_ADJUST	5
 #if SCALE_ADJUST == 5
-prescale:
-	.word	0x0001
-	.word	0x0010
-	.word	0x0200
-	.word	0x0000
-
-scale:
-	.word	0x20e8		/* (0x00ff0000 / 0x000007c0) + 1 */
-	.word	0x40c5		/* (0x00ff0000 / 0x000003f0) + 1 */
-	.word	0x839d		/* (0x00ff0000 / 0x000001f0) + 1 */
-	.word	0x0000
+#define PRESCALE_L 0x00100001
+#define PRESCALE_H 0x00000200
+#define SCALE_L 0x40C620E8
+#define SCALE_H 0x0000839d
 #elif SCALE_ADJUST == 0
-prescale:
-	.word	0x0001
-	.word	0x0020
-	.word	0x0800
-	.word	0x0000
-
-scale:
-	.word	0x0108		/* (0x00ff0000 / 0x0000f800) + 1 */
-	.word	0x0104		/* (0x00ff0000 / 0x0000fc00) + 1 */
-	.word	0x0108		/* (0x00ff0000 / 0x0000f800) + 1 */
-	.word	0x0000
+#define PRESCALE_L 0x00200001
+#define PRESCALE_H 0x00000800
+#define SCALE_L 0x01040108
+#define SCALE_H 0x00000108
 #else
 #error SCALE_ADJUST must either be 5 or 0.
 #endif
-
-
-alpha:	.long	0x00000000
-	.long	0x00ff0000
+#define ALPHA_L 0x00000000
+#define ALPHA_H 0x00ff0000
 
 /**
  * MMX optimized version of the RGB565 to RGBA copy routine.
@@ -530,9 +507,19 @@ _generic_read_RGBA_span_RGB565_MMX:
 	movl	8(%esp), %edx	/* destination pointer */
 	movl	12(%esp), %ecx	/* number of pixels to copy */
 
-	movq	mask_565, %mm5
-	movq	prescale, %mm6
-	movq	scale, %mm7
+	pushl	$MASK_565_H
+	pushl	$MASK_565_L
+	movq	(%esp), %mm5
+	pushl	$PRESCALE_H
+	pushl	$PRESCALE_L
+	movq	(%esp), %mm6
+	pushl	$SCALE_H
+	pushl	$SCALE_L
+	movq	(%esp), %mm7
+	pushl	$ALPHA_H
+	pushl	$ALPHA_L
+	movq	(%esp), %mm3
+	addl	$32,%esp
 
 	sarl	$2, %ecx
 	jle	.L01		/* Bail early if the count is negative. */
@@ -581,8 +568,8 @@ _generic_read_RGBA_span_RGB565_MMX:
 	/* Always set the alpha value to 0xff.
 	 */
 
-	por	alpha, %mm0
-	por	alpha, %mm2
+ 	por %mm3, %mm0
+ 	por %mm3, %mm2
 
 
 	/* Pack the 16-bit values to 8-bit values and store the converted
@@ -593,8 +580,6 @@ _generic_read_RGBA_span_RGB565_MMX:
 	movq	%mm0, (%edx)
 	addl	$8, %edx
 
-
-
 	pshufw	$0xaa, %mm4, %mm0
 	pshufw	$0xff, %mm4, %mm2
 
@@ -609,8 +594,8 @@ _generic_read_RGBA_span_RGB565_MMX:
 	pmulhuw	%mm7, %mm0
 	pmulhuw	%mm7, %mm2
 
-	por	alpha, %mm0
-	por	alpha, %mm2
+ 	por %mm3, %mm0
+ 	por %mm3, %mm2
 
 	packuswb	%mm2, %mm0
 
@@ -647,8 +632,8 @@ _generic_read_RGBA_span_RGB565_MMX:
 	pmulhuw	%mm7, %mm0
 	pmulhuw	%mm7, %mm2
 
-	por	alpha, %mm0
-	por	alpha, %mm2
+ 	por %mm3, %mm0
+ 	por %mm3, %mm2
 
 	packuswb	%mm2, %mm0
 
@@ -675,7 +660,7 @@ _generic_read_RGBA_span_RGB565_MMX:
 #endif
 	pmulhuw	%mm7, %mm0
 
-	por	alpha, %mm0
+ 	por %mm3, %mm0
 
 	packuswb	%mm0, %mm0