X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fx86%2Fread_rgba_span_x86.S;h=80144b889c72e5b41c12c6934cfeed2eab5a7295;hb=ab9d1011f5549502a4b960c2067cde69856a2719;hp=960cffa50bc61909dc5871d91cc8cd86b1ec8295;hpb=e3358dea660f5dec53a8be9e38d725f4fd829e14;p=mesa.git diff --git a/src/mesa/x86/read_rgba_span_x86.S b/src/mesa/x86/read_rgba_span_x86.S index 960cffa50bc..80144b889c7 100644 --- a/src/mesa/x86/read_rgba_span_x86.S +++ b/src/mesa/x86/read_rgba_span_x86.S @@ -32,22 +32,23 @@ .file "read_rgba_span_x86.S" #if !defined(__DJGPP__) && !defined(__MINGW32__) /* this one cries for assyntax.h */ - .section .rodata - .align 16 - .type mask, @object - .size mask, 32 -mask: - .long 0xff00ff00 - .long 0xff00ff00 - .long 0xff00ff00 - .long 0xff00ff00 - .long 0x00ff0000 - .long 0x00ff0000 - .long 0x00ff0000 - .long 0x00ff0000 - - -/* I implemented these as macros because the appear in quite a few places, +/* Kevin F. Quinn 2nd July 2006 + * Replaced data segment constants with text-segment instructions. + */ +#define LOAD_MASK(mvins,m1,m2) \ + pushl $0xff00ff00 ;\ + pushl $0xff00ff00 ;\ + pushl $0xff00ff00 ;\ + pushl $0xff00ff00 ;\ + mvins (%esp), m1 ;\ + pushl $0x00ff0000 ;\ + pushl $0x00ff0000 ;\ + pushl $0x00ff0000 ;\ + pushl $0x00ff0000 ;\ + mvins (%esp), m2 ;\ + addl $32, %esp + +/* I implemented these as macros because they appear in several places, * and I've tweaked them a number of times. I got tired of changing every * place they appear. :) */ @@ -84,8 +85,7 @@ _generic_read_RGBA_span_BGRA8888_REV_MMX: #ifdef USE_INNER_EMMS emms #endif - movq mask, %mm1 - movq mask+16, %mm2 + LOAD_MASK(movq,%mm1,%mm2) movl 8(%esp), %ebx /* source pointer */ movl 16(%esp), %edx /* number of pixels to copy */ @@ -182,8 +182,8 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE: #ifdef USE_INNER_EMMS emms #endif - movq mask, %mm1 - movq mask+16, %mm2 + + LOAD_MASK(movq,%mm1,%mm2) movl 16(%esp), %ebx /* source pointer */ movl 24(%esp), %edx /* number of pixels to copy */ @@ -341,8 +341,7 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: pushl %esi pushl %ebx - movdqa mask, %xmm1 - movdqa mask+16, %xmm2 + LOAD_MASK(movdqu,%xmm1,%xmm2) movl 12(%esp), %ebx /* source pointer */ movl 20(%esp), %edx /* number of pixels to copy */ @@ -435,7 +434,8 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: je .L47 movq (%ebx), %xmm0 - + addl $8, %ebx + movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm4 andps %xmm1, %xmm0 @@ -449,6 +449,7 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: orps %xmm3, %xmm0 movq %xmm0, (%ecx) + addl $8, %ecx .L47: testl $1, %edx @@ -464,52 +465,28 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: - .section .rodata - - .align 16 -mask_565: - .word 0xf800 - .word 0x07e0 - .word 0x001f - .word 0x0000 - -/* Setting SCALE_ADJUST to 5 gives a perfect match with the classic C - * implementation in Mesa. Setting SCALE_ADJUST to 0 is slightly faster but - * at a small cost to accuracy. +#define MASK_565_L 0x07e0f800 +#define MASK_565_H 0x0000001f +/* Setting SCALE_ADJUST to 5 gives a perfect match with the + * classic C implementation in Mesa. Setting SCALE_ADJUST + * to 0 is slightly faster but at a small cost to accuracy. */ - #define SCALE_ADJUST 5 #if SCALE_ADJUST == 5 -prescale: - .word 0x0001 - .word 0x0010 - .word 0x0200 - .word 0x0000 - -scale: - .word 0x20e8 /* (0x00ff0000 / 0x000007c0) + 1 */ - .word 0x40c5 /* (0x00ff0000 / 0x000003f0) + 1 */ - .word 0x839d /* (0x00ff0000 / 0x000001f0) + 1 */ - .word 0x0000 +#define PRESCALE_L 0x00100001 +#define PRESCALE_H 0x00000200 +#define SCALE_L 0x40C620E8 +#define SCALE_H 0x0000839d #elif SCALE_ADJUST == 0 -prescale: - .word 0x0001 - .word 0x0020 - .word 0x0800 - .word 0x0000 - -scale: - .word 0x0108 /* (0x00ff0000 / 0x0000f800) + 1 */ - .word 0x0104 /* (0x00ff0000 / 0x0000fc00) + 1 */ - .word 0x0108 /* (0x00ff0000 / 0x0000f800) + 1 */ - .word 0x0000 +#define PRESCALE_L 0x00200001 +#define PRESCALE_H 0x00000800 +#define SCALE_L 0x01040108 +#define SCALE_H 0x00000108 #else #error SCALE_ADJUST must either be 5 or 0. #endif - - -alpha: .long 0x00000000 - .long 0x00ff0000 +#define ALPHA_L 0x00000000 +#define ALPHA_H 0x00ff0000 /** * MMX optimized version of the RGB565 to RGBA copy routine. @@ -530,9 +507,19 @@ _generic_read_RGBA_span_RGB565_MMX: movl 8(%esp), %edx /* destination pointer */ movl 12(%esp), %ecx /* number of pixels to copy */ - movq mask_565, %mm5 - movq prescale, %mm6 - movq scale, %mm7 + pushl $MASK_565_H + pushl $MASK_565_L + movq (%esp), %mm5 + pushl $PRESCALE_H + pushl $PRESCALE_L + movq (%esp), %mm6 + pushl $SCALE_H + pushl $SCALE_L + movq (%esp), %mm7 + pushl $ALPHA_H + pushl $ALPHA_L + movq (%esp), %mm3 + addl $32,%esp sarl $2, %ecx jle .L01 /* Bail early if the count is negative. */ @@ -581,8 +568,8 @@ _generic_read_RGBA_span_RGB565_MMX: /* Always set the alpha value to 0xff. */ - por alpha, %mm0 - por alpha, %mm2 + por %mm3, %mm0 + por %mm3, %mm2 /* Pack the 16-bit values to 8-bit values and store the converted @@ -593,8 +580,6 @@ _generic_read_RGBA_span_RGB565_MMX: movq %mm0, (%edx) addl $8, %edx - - pshufw $0xaa, %mm4, %mm0 pshufw $0xff, %mm4, %mm2 @@ -609,8 +594,8 @@ _generic_read_RGBA_span_RGB565_MMX: pmulhuw %mm7, %mm0 pmulhuw %mm7, %mm2 - por alpha, %mm0 - por alpha, %mm2 + por %mm3, %mm0 + por %mm3, %mm2 packuswb %mm2, %mm0 @@ -647,8 +632,8 @@ _generic_read_RGBA_span_RGB565_MMX: pmulhuw %mm7, %mm0 pmulhuw %mm7, %mm2 - por alpha, %mm0 - por alpha, %mm2 + por %mm3, %mm0 + por %mm3, %mm2 packuswb %mm2, %mm0 @@ -675,7 +660,7 @@ _generic_read_RGBA_span_RGB565_MMX: #endif pmulhuw %mm7, %mm0 - por alpha, %mm0 + por %mm3, %mm0 packuswb %mm0, %mm0