X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fx86%2Fread_rgba_span_x86.S;h=817729973713d41febd41f57844bc250c74146ba;hb=15da8d076110c39d3ce34ac45edf0f3c9ab13b7f;hp=0bade675a58fb98dbd8d9255d10a6f1df4005bbd;hpb=ea3885812704645944752887d892c38a46710956;p=mesa.git diff --git a/src/mesa/x86/read_rgba_span_x86.S b/src/mesa/x86/read_rgba_span_x86.S index 0bade675a58..81772997371 100644 --- a/src/mesa/x86/read_rgba_span_x86.S +++ b/src/mesa/x86/read_rgba_span_x86.S @@ -31,23 +31,24 @@ */ .file "read_rgba_span_x86.S" -#if !defined(__DJGPP__) && !defined(__MINGW32__) /* this one cries for assyntax.h */ - .section .rodata - .align 16 - .type mask, @object - .size mask, 32 -mask: - .long 0xff00ff00 - .long 0xff00ff00 - .long 0xff00ff00 - .long 0xff00ff00 - .long 0x00ff0000 - .long 0x00ff0000 - .long 0x00ff0000 - .long 0x00ff0000 - - -/* I implemented these as macros because the appear in quite a few places, +#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */ +/* Kevin F. Quinn 2nd July 2006 + * Replaced data segment constants with text-segment instructions. + */ +#define LOAD_MASK(mvins,m1,m2) \ + pushl $0xff00ff00 ;\ + pushl $0xff00ff00 ;\ + pushl $0xff00ff00 ;\ + pushl $0xff00ff00 ;\ + mvins (%esp), m1 ;\ + pushl $0x00ff0000 ;\ + pushl $0x00ff0000 ;\ + pushl $0x00ff0000 ;\ + pushl $0x00ff0000 ;\ + mvins (%esp), m2 ;\ + addl $32, %esp + +/* I implemented these as macros because they appear in several places, * and I've tweaked them a number of times. I got tired of changing every * place they appear. :) */ @@ -76,6 +77,7 @@ mask: */ .globl _generic_read_RGBA_span_BGRA8888_REV_MMX +.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX .type _generic_read_RGBA_span_BGRA8888_REV_MMX, @function _generic_read_RGBA_span_BGRA8888_REV_MMX: pushl %ebx @@ -83,15 +85,14 @@ _generic_read_RGBA_span_BGRA8888_REV_MMX: #ifdef USE_INNER_EMMS emms #endif - movq mask, %mm1 - movq mask+16, %mm2 + LOAD_MASK(movq,%mm1,%mm2) movl 8(%esp), %ebx /* source pointer */ movl 16(%esp), %edx /* number of pixels to copy */ movl 12(%esp), %ecx /* destination pointer */ testl %edx, %edx - je .L20 /* Bail if there's nothing to do. */ + jle .L20 /* Bail if there's nothing to do. */ movl %ebx, %eax @@ -171,6 +172,7 @@ _generic_read_RGBA_span_BGRA8888_REV_MMX: */ .globl _generic_read_RGBA_span_BGRA8888_REV_SSE +.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE .type _generic_read_RGBA_span_BGRA8888_REV_SSE, @function _generic_read_RGBA_span_BGRA8888_REV_SSE: pushl %esi @@ -180,13 +182,16 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE: #ifdef USE_INNER_EMMS emms #endif - movq mask, %mm1 - movq mask+16, %mm2 + + LOAD_MASK(movq,%mm1,%mm2) movl 16(%esp), %ebx /* source pointer */ movl 24(%esp), %edx /* number of pixels to copy */ movl 20(%esp), %ecx /* destination pointer */ + testl %edx, %edx + jle .L35 /* Bail if there's nothing to do. */ + movl %esp, %ebp subl $16, %esp andl $0xfffffff0, %esp @@ -330,13 +335,13 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE: .text .globl _generic_read_RGBA_span_BGRA8888_REV_SSE2 +.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2 .type _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function _generic_read_RGBA_span_BGRA8888_REV_SSE2: pushl %esi pushl %ebx - movdqa mask, %xmm1 - movdqa mask+16, %xmm2 + LOAD_MASK(movdqu,%xmm1,%xmm2) movl 12(%esp), %ebx /* source pointer */ movl 20(%esp), %edx /* number of pixels to copy */ @@ -345,6 +350,9 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: movl %ebx, %eax movl %edx, %esi + testl %edx, %edx + jle .L46 /* Bail if there's nothing to do. */ + /* If the source pointer isn't a multiple of 16 we have to process * a few pixels the "slow" way to get the address aligned for * the SSE fetch intsructions. @@ -426,7 +434,8 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: je .L47 movq (%ebx), %xmm0 - + addl $8, %ebx + movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm4 andps %xmm1, %xmm0 @@ -440,6 +449,7 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: orps %xmm3, %xmm0 movq %xmm0, (%ecx) + addl $8, %ecx .L47: testl $1, %edx @@ -455,52 +465,28 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: - .section .rodata - - .align 16 -mask_565: - .word 0xf800 - .word 0x07e0 - .word 0x001f - .word 0x0000 - -/* Setting SCALE_ADJUST to 5 gives a perfect match with the classic C - * implementation in Mesa. Setting SCALE_ADJUST to 0 is slightly faster but - * at a small cost to accuracy. +#define MASK_565_L 0x07e0f800 +#define MASK_565_H 0x0000001f +/* Setting SCALE_ADJUST to 5 gives a perfect match with the + * classic C implementation in Mesa. Setting SCALE_ADJUST + * to 0 is slightly faster but at a small cost to accuracy. */ - #define SCALE_ADJUST 5 #if SCALE_ADJUST == 5 -prescale: - .word 0x0001 - .word 0x0010 - .word 0x0200 - .word 0x0000 - -scale: - .word 0x20e8 /* (0x00ff0000 / 0x000007c0) + 1 */ - .word 0x40c5 /* (0x00ff0000 / 0x000003f0) + 1 */ - .word 0x839d /* (0x00ff0000 / 0x000001f0) + 1 */ - .word 0x0000 +#define PRESCALE_L 0x00100001 +#define PRESCALE_H 0x00000200 +#define SCALE_L 0x40C620E8 +#define SCALE_H 0x0000839d #elif SCALE_ADJUST == 0 -prescale: - .word 0x0001 - .word 0x0020 - .word 0x0800 - .word 0x0000 - -scale: - .word 0x0108 /* (0x00ff0000 / 0x0000f800) + 1 */ - .word 0x0104 /* (0x00ff0000 / 0x0000fc00) + 1 */ - .word 0x0108 /* (0x00ff0000 / 0x0000f800) + 1 */ - .word 0x0000 +#define PRESCALE_L 0x00200001 +#define PRESCALE_H 0x00000800 +#define SCALE_L 0x01040108 +#define SCALE_H 0x00000108 #else #error SCALE_ADJUST must either be 5 or 0. #endif - - -alpha: .long 0x00000000 - .long 0x00ff0000 +#define ALPHA_L 0x00000000 +#define ALPHA_H 0x00ff0000 /** * MMX optimized version of the RGB565 to RGBA copy routine. @@ -508,6 +494,7 @@ alpha: .long 0x00000000 .text .globl _generic_read_RGBA_span_RGB565_MMX + .hidden _generic_read_RGBA_span_RGB565_MMX .type _generic_read_RGBA_span_RGB565_MMX, @function _generic_read_RGBA_span_RGB565_MMX: @@ -520,11 +507,22 @@ _generic_read_RGBA_span_RGB565_MMX: movl 8(%esp), %edx /* destination pointer */ movl 12(%esp), %ecx /* number of pixels to copy */ - movq mask_565, %mm5 - movq prescale, %mm6 - movq scale, %mm7 - - shrl $2, %ecx + pushl $MASK_565_H + pushl $MASK_565_L + movq (%esp), %mm5 + pushl $PRESCALE_H + pushl $PRESCALE_L + movq (%esp), %mm6 + pushl $SCALE_H + pushl $SCALE_L + movq (%esp), %mm7 + pushl $ALPHA_H + pushl $ALPHA_L + movq (%esp), %mm3 + addl $32,%esp + + sarl $2, %ecx + jl .L01 /* Bail early if the count is negative. */ jmp .L02 .L03: @@ -570,8 +568,8 @@ _generic_read_RGBA_span_RGB565_MMX: /* Always set the alpha value to 0xff. */ - por alpha, %mm0 - por alpha, %mm2 + por %mm3, %mm0 + por %mm3, %mm2 /* Pack the 16-bit values to 8-bit values and store the converted @@ -582,8 +580,6 @@ _generic_read_RGBA_span_RGB565_MMX: movq %mm0, (%edx) addl $8, %edx - - pshufw $0xaa, %mm4, %mm0 pshufw $0xff, %mm4, %mm2 @@ -598,8 +594,8 @@ _generic_read_RGBA_span_RGB565_MMX: pmulhuw %mm7, %mm0 pmulhuw %mm7, %mm2 - por alpha, %mm0 - por alpha, %mm2 + por %mm3, %mm0 + por %mm3, %mm2 packuswb %mm2, %mm0 @@ -636,8 +632,8 @@ _generic_read_RGBA_span_RGB565_MMX: pmulhuw %mm7, %mm0 pmulhuw %mm7, %mm2 - por alpha, %mm0 - por alpha, %mm2 + por %mm3, %mm0 + por %mm3, %mm2 packuswb %mm2, %mm0 @@ -652,7 +648,7 @@ _generic_read_RGBA_span_RGB565_MMX: testl $0x01, %ecx je .L01 - movzxw (%eax), %ecx + movzwl (%eax), %ecx movd %ecx, %mm4 pshufw $0x00, %mm4, %mm0 @@ -664,7 +660,7 @@ _generic_read_RGBA_span_RGB565_MMX: #endif pmulhuw %mm7, %mm0 - por alpha, %mm0 + por %mm3, %mm0 packuswb %mm0, %mm0 @@ -675,4 +671,8 @@ _generic_read_RGBA_span_RGB565_MMX: emms #endif ret -#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) */ +#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */ + +#if defined (__ELF__) && defined (__linux__) + .section .note.GNU-stack,"",%progbits +#endif