X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fx86%2Fread_rgba_span_x86.S;h=817729973713d41febd41f57844bc250c74146ba;hb=15da8d076110c39d3ce34ac45edf0f3c9ab13b7f;hp=0bade675a58fb98dbd8d9255d10a6f1df4005bbd;hpb=ea3885812704645944752887d892c38a46710956;p=mesa.git

diff --git a/src/mesa/x86/read_rgba_span_x86.S b/src/mesa/x86/read_rgba_span_x86.S
index 0bade675a58..81772997371 100644
--- a/src/mesa/x86/read_rgba_span_x86.S
+++ b/src/mesa/x86/read_rgba_span_x86.S
@@ -31,23 +31,24 @@
  */
 
 	.file	"read_rgba_span_x86.S"
-#if !defined(__DJGPP__) && !defined(__MINGW32__) /* this one cries for assyntax.h */
-	.section	.rodata
-	.align 16
-	.type	mask, @object
-	.size	mask, 32
-mask:
-	.long	0xff00ff00
-	.long	0xff00ff00
-	.long	0xff00ff00
-	.long	0xff00ff00
-	.long	0x00ff0000
-	.long	0x00ff0000
-	.long	0x00ff0000
-	.long	0x00ff0000
-
-
-/* I implemented these as macros because the appear in quite a few places,
+#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
+/* Kevin F. Quinn 2nd July 2006
+ * Replaced data segment constants with text-segment instructions.
+ */
+#define	LOAD_MASK(mvins,m1,m2) \
+   	pushl	$0xff00ff00 ;\
+   	pushl	$0xff00ff00 ;\
+   	pushl	$0xff00ff00 ;\
+   	pushl	$0xff00ff00 ;\
+	mvins	(%esp), m1	;\
+   	pushl	$0x00ff0000 ;\
+   	pushl	$0x00ff0000 ;\
+   	pushl	$0x00ff0000 ;\
+   	pushl	$0x00ff0000 ;\
+	mvins	(%esp), m2	;\
+	addl	$32, %esp
+
+/* I implemented these as macros because they appear in several places,
  * and I've tweaked them a number of times.  I got tired of changing every
  * place they appear. :)
  */
@@ -76,6 +77,7 @@ mask:
  */
 
 .globl _generic_read_RGBA_span_BGRA8888_REV_MMX
+.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
 	.type	_generic_read_RGBA_span_BGRA8888_REV_MMX, @function
 _generic_read_RGBA_span_BGRA8888_REV_MMX:
 	pushl	%ebx
@@ -83,15 +85,14 @@ _generic_read_RGBA_span_BGRA8888_REV_MMX:
 #ifdef USE_INNER_EMMS
 	emms
 #endif
-	movq	mask, %mm1
-	movq	mask+16, %mm2
+	LOAD_MASK(movq,%mm1,%mm2)
 
 	movl	8(%esp), %ebx	/* source pointer */
 	movl	16(%esp), %edx	/* number of pixels to copy */
 	movl	12(%esp), %ecx	/* destination pointer */
 
 	testl	%edx, %edx
-	je	.L20		/* Bail if there's nothing to do. */
+	jle	.L20		/* Bail if there's nothing to do. */
 
 	movl	%ebx, %eax
 
@@ -171,6 +172,7 @@ _generic_read_RGBA_span_BGRA8888_REV_MMX:
  */
 
 .globl _generic_read_RGBA_span_BGRA8888_REV_SSE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
 	.type	_generic_read_RGBA_span_BGRA8888_REV_SSE, @function
 _generic_read_RGBA_span_BGRA8888_REV_SSE:
 	pushl	%esi
@@ -180,13 +182,16 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE:
 #ifdef USE_INNER_EMMS
 	emms
 #endif
-	movq	mask, %mm1
-	movq	mask+16, %mm2
+
+	LOAD_MASK(movq,%mm1,%mm2)
 
 	movl	16(%esp), %ebx	/* source pointer */
 	movl	24(%esp), %edx	/* number of pixels to copy */
 	movl	20(%esp), %ecx	/* destination pointer */
 
+	testl	%edx, %edx
+	jle	.L35		/* Bail if there's nothing to do. */
+
 	movl	%esp, %ebp
 	subl	$16, %esp
 	andl	$0xfffffff0, %esp
@@ -330,13 +335,13 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE:
 
 	.text
 .globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
 	.type	_generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
 _generic_read_RGBA_span_BGRA8888_REV_SSE2:
 	pushl	%esi
 	pushl	%ebx
 
-	movdqa	mask, %xmm1
-	movdqa	mask+16, %xmm2
+	LOAD_MASK(movdqu,%xmm1,%xmm2)
 
 	movl	12(%esp), %ebx	/* source pointer */
 	movl	20(%esp), %edx	/* number of pixels to copy */
@@ -345,6 +350,9 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2:
 	movl	%ebx, %eax
 	movl	%edx, %esi
 
+	testl	%edx, %edx
+	jle	.L46		/* Bail if there's nothing to do. */
+
 	/* If the source pointer isn't a multiple of 16 we have to process
 	 * a few pixels the "slow" way to get the address aligned for
 	 * the SSE fetch intsructions.
@@ -426,7 +434,8 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2:
 	je	.L47
 
 	movq	(%ebx), %xmm0
-
+	addl	$8, %ebx
+        
 	movdqa	%xmm0, %xmm3
 	movdqa	%xmm0, %xmm4
 	andps	%xmm1, %xmm0
@@ -440,6 +449,7 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2:
 	orps	%xmm3, %xmm0
 
 	movq	%xmm0, (%ecx)
+	addl	$8, %ecx        
 .L47:
 
 	testl	$1, %edx
@@ -455,52 +465,28 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2:
 
 
 
-	.section	.rodata
-
-	.align	16
-mask_565:
-	.word	0xf800
-	.word	0x07e0
-	.word	0x001f
-	.word	0x0000
-
-/* Setting SCALE_ADJUST to 5 gives a perfect match with the classic C
- * implementation in Mesa.  Setting SCALE_ADJUST to 0 is slightly faster but
- * at a small cost to accuracy.
+#define MASK_565_L	0x07e0f800
+#define MASK_565_H	0x0000001f
+/* Setting SCALE_ADJUST to 5 gives a perfect match with the
+ * classic C implementation in Mesa.  Setting SCALE_ADJUST
+ * to 0 is slightly faster but at a small cost to accuracy.
  */
-
 #define SCALE_ADJUST	5
 #if SCALE_ADJUST == 5
-prescale:
-	.word	0x0001
-	.word	0x0010
-	.word	0x0200
-	.word	0x0000
-
-scale:
-	.word	0x20e8		/* (0x00ff0000 / 0x000007c0) + 1 */
-	.word	0x40c5		/* (0x00ff0000 / 0x000003f0) + 1 */
-	.word	0x839d		/* (0x00ff0000 / 0x000001f0) + 1 */
-	.word	0x0000
+#define PRESCALE_L 0x00100001
+#define PRESCALE_H 0x00000200
+#define SCALE_L 0x40C620E8
+#define SCALE_H 0x0000839d
 #elif SCALE_ADJUST == 0
-prescale:
-	.word	0x0001
-	.word	0x0020
-	.word	0x0800
-	.word	0x0000
-
-scale:
-	.word	0x0108		/* (0x00ff0000 / 0x0000f800) + 1 */
-	.word	0x0104		/* (0x00ff0000 / 0x0000fc00) + 1 */
-	.word	0x0108		/* (0x00ff0000 / 0x0000f800) + 1 */
-	.word	0x0000
+#define PRESCALE_L 0x00200001
+#define PRESCALE_H 0x00000800
+#define SCALE_L 0x01040108
+#define SCALE_H 0x00000108
 #else
 #error SCALE_ADJUST must either be 5 or 0.
 #endif
-
-
-alpha:	.long	0x00000000
-	.long	0x00ff0000
+#define ALPHA_L 0x00000000
+#define ALPHA_H 0x00ff0000
 
 /**
  * MMX optimized version of the RGB565 to RGBA copy routine.
@@ -508,6 +494,7 @@ alpha:	.long	0x00000000
 
 	.text
 	.globl	_generic_read_RGBA_span_RGB565_MMX
+        .hidden _generic_read_RGBA_span_RGB565_MMX
 	.type	_generic_read_RGBA_span_RGB565_MMX, @function
 
 _generic_read_RGBA_span_RGB565_MMX:
@@ -520,11 +507,22 @@ _generic_read_RGBA_span_RGB565_MMX:
 	movl	8(%esp), %edx	/* destination pointer */
 	movl	12(%esp), %ecx	/* number of pixels to copy */
 
-	movq	mask_565, %mm5
-	movq	prescale, %mm6
-	movq	scale, %mm7
-
-	shrl	$2, %ecx
+	pushl	$MASK_565_H
+	pushl	$MASK_565_L
+	movq	(%esp), %mm5
+	pushl	$PRESCALE_H
+	pushl	$PRESCALE_L
+	movq	(%esp), %mm6
+	pushl	$SCALE_H
+	pushl	$SCALE_L
+	movq	(%esp), %mm7
+	pushl	$ALPHA_H
+	pushl	$ALPHA_L
+	movq	(%esp), %mm3
+	addl	$32,%esp
+
+	sarl	$2, %ecx
+	jl	.L01		/* Bail early if the count is negative. */
 	jmp	.L02
 
 .L03:
@@ -570,8 +568,8 @@ _generic_read_RGBA_span_RGB565_MMX:
 	/* Always set the alpha value to 0xff.
 	 */
 
-	por	alpha, %mm0
-	por	alpha, %mm2
+ 	por %mm3, %mm0
+ 	por %mm3, %mm2
 
 
 	/* Pack the 16-bit values to 8-bit values and store the converted
@@ -582,8 +580,6 @@ _generic_read_RGBA_span_RGB565_MMX:
 	movq	%mm0, (%edx)
 	addl	$8, %edx
 
-
-
 	pshufw	$0xaa, %mm4, %mm0
 	pshufw	$0xff, %mm4, %mm2
 
@@ -598,8 +594,8 @@ _generic_read_RGBA_span_RGB565_MMX:
 	pmulhuw	%mm7, %mm0
 	pmulhuw	%mm7, %mm2
 
-	por	alpha, %mm0
-	por	alpha, %mm2
+ 	por %mm3, %mm0
+ 	por %mm3, %mm2
 
 	packuswb	%mm2, %mm0
 
@@ -636,8 +632,8 @@ _generic_read_RGBA_span_RGB565_MMX:
 	pmulhuw	%mm7, %mm0
 	pmulhuw	%mm7, %mm2
 
-	por	alpha, %mm0
-	por	alpha, %mm2
+ 	por %mm3, %mm0
+ 	por %mm3, %mm2
 
 	packuswb	%mm2, %mm0
 
@@ -652,7 +648,7 @@ _generic_read_RGBA_span_RGB565_MMX:
 	testl	$0x01, %ecx
 	je	.L01
 
-	movzxw	(%eax), %ecx
+	movzwl	(%eax), %ecx
 	movd	%ecx, %mm4
 
 	pshufw	$0x00, %mm4, %mm0
@@ -664,7 +660,7 @@ _generic_read_RGBA_span_RGB565_MMX:
 #endif
 	pmulhuw	%mm7, %mm0
 
-	por	alpha, %mm0
+ 	por %mm3, %mm0
 
 	packuswb	%mm0, %mm0
 
@@ -675,4 +671,8 @@ _generic_read_RGBA_span_RGB565_MMX:
 	emms
 #endif
 	ret
-#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) */
+#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */
+	
+#if defined (__ELF__) && defined (__linux__)
+	.section .note.GNU-stack,"",%progbits
+#endif