2 * Copyright 2000-2001 VA Linux Systems, Inc.
3 * (C) Copyright IBM Corporation 2004
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
29 * Template file of span read / write functions.
31 * \author Keith Whitwell <keithw@tungstengraphics.com>
32 * \author Gareth Hughes <gareth@nvidia.com>
33 * \author Ian Romanick <idr@us.ibm.com>
36 #include "main/colormac.h"
37 #include "spantmp_common.h"
43 #ifndef HW_READ_CLIPLOOP
44 #define HW_READ_CLIPLOOP() HW_CLIPLOOP()
47 #ifndef HW_WRITE_CLIPLOOP
48 #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP()
51 #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
54 ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5
59 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
62 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
63 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
64 #endif /* GET_VALUE */
66 #define INIT_MONO_PIXEL(p, color) \
67 p = PACK_COLOR_565( color[0], color[1], color[2] )
69 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
70 PUT_VALUE(_x, _y, ((((int)r & 0xf8) << 8) | \
71 (((int)g & 0xfc) << 3) | \
72 (((int)b & 0xf8) >> 3))) \
74 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
76 #define READ_RGBA( rgba, _x, _y ) \
78 GLushort p = GET_VALUE(_x, _y); \
79 rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \
80 rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \
81 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
85 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
88 ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV
93 #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
96 #define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
97 #define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
98 #endif /* GET_VALUE */
100 # define INIT_MONO_PIXEL(p, color) \
101 p = PACK_COLOR_8888(color[3], color[0], color[1], color[2])
103 # define WRITE_RGBA(_x, _y, r, g, b, a) \
104 PUT_VALUE(_x, _y, ((r << 16) | \
109 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
111 # if defined( USE_X86_ASM )
112 # define READ_RGBA(rgba, _x, _y) \
114 GLuint p = GET_VALUE(_x, _y); \
115 __asm__ __volatile__( "bswap %0; rorl $8, %0" \
116 : "=r" (p) : "0" (p) ); \
117 ((GLuint *)rgba)[0] = p; \
119 # elif defined( MESA_BIG_ENDIAN )
120 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
121 * rotlwi instruction. It also produces good code on SPARC.
123 # define READ_RGBA( rgba, _x, _y ) \
125 GLuint p = GET_VALUE(_x, _y); \
127 *((uint32_t *) rgba) = (t >> 24) | (p << 8); \
130 # define READ_RGBA( rgba, _x, _y ) \
132 GLuint p = GET_VALUE(_x, _y); \
133 rgba[0] = (p >> 16) & 0xff; \
134 rgba[1] = (p >> 8) & 0xff; \
135 rgba[2] = (p >> 0) & 0xff; \
136 rgba[3] = (p >> 24) & 0xff; \
141 #error SPANTMP_PIXEL_FMT must be set to a valid value!
147 ** Assembly routines.
150 #if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM )
151 #include "x86/read_rgba_span_x86.h"
152 #include "x86/common_x86_asm.h"
155 static void TAG(WriteRGBASpan
)( GLcontext
*ctx
,
156 struct gl_renderbuffer
*rb
,
157 GLuint n
, GLint x
, GLint y
,
158 const void *values
, const GLubyte mask
[] )
162 const GLubyte (*rgba
)[4] = (const GLubyte (*)[4]) values
;
172 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
174 if (DBG
) fprintf(stderr
, "WriteRGBASpan %d..%d (x1 %d)\n",
175 (int)i
, (int)n1
, (int)x1
);
179 for (;n1
>0;i
++,x1
++,n1
--)
182 rgba
[i
][0], rgba
[i
][1],
183 rgba
[i
][2], rgba
[i
][3] );
187 for (;n1
>0;i
++,x1
++,n1
--)
189 rgba
[i
][0], rgba
[i
][1],
190 rgba
[i
][2], rgba
[i
][3] );
198 static void TAG(WriteRGBSpan
)( GLcontext
*ctx
,
199 struct gl_renderbuffer
*rb
,
200 GLuint n
, GLint x
, GLint y
,
201 const void *values
, const GLubyte mask
[] )
205 const GLubyte (*rgb
)[3] = (const GLubyte (*)[3]) values
;
215 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
217 if (DBG
) fprintf(stderr
, "WriteRGBSpan %d..%d (x1 %d)\n",
218 (int)i
, (int)n1
, (int)x1
);
222 for (;n1
>0;i
++,x1
++,n1
--)
224 WRITE_RGBA( x1
, y
, rgb
[i
][0], rgb
[i
][1], rgb
[i
][2], 255 );
228 for (;n1
>0;i
++,x1
++,n1
--)
229 WRITE_RGBA( x1
, y
, rgb
[i
][0], rgb
[i
][1], rgb
[i
][2], 255 );
237 static void TAG(WriteRGBAPixels
)( GLcontext
*ctx
,
238 struct gl_renderbuffer
*rb
,
239 GLuint n
, const GLint x
[], const GLint y
[],
240 const void *values
, const GLubyte mask
[] )
244 const GLubyte (*rgba
)[4] = (const GLubyte (*)[4]) values
;
248 if (DBG
) fprintf(stderr
, "WriteRGBAPixels\n");
257 const int fy
= Y_FLIP(y
[i
]);
258 if (CLIPPIXEL(x
[i
],fy
))
259 WRITE_RGBA( x
[i
], fy
,
260 rgba
[i
][0], rgba
[i
][1],
261 rgba
[i
][2], rgba
[i
][3] );
269 const int fy
= Y_FLIP(y
[i
]);
270 if (CLIPPIXEL(x
[i
],fy
))
271 WRITE_RGBA( x
[i
], fy
,
272 rgba
[i
][0], rgba
[i
][1],
273 rgba
[i
][2], rgba
[i
][3] );
283 static void TAG(WriteMonoRGBASpan
)( GLcontext
*ctx
,
284 struct gl_renderbuffer
*rb
,
285 GLuint n
, GLint x
, GLint y
,
286 const void *value
, const GLubyte mask
[] )
290 const GLubyte
*color
= (const GLubyte
*) value
;
294 INIT_MONO_PIXEL(p
, color
);
298 if (DBG
) fprintf(stderr
, "WriteMonoRGBASpan\n");
303 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
306 for (;n1
>0;i
++,x1
++,n1
--)
308 WRITE_PIXEL( x1
, y
, p
);
312 for (;n1
>0;i
++,x1
++,n1
--)
313 WRITE_PIXEL( x1
, y
, p
);
322 static void TAG(WriteMonoRGBAPixels
)( GLcontext
*ctx
,
323 struct gl_renderbuffer
*rb
,
325 const GLint x
[], const GLint y
[],
327 const GLubyte mask
[] )
331 const GLubyte
*color
= (const GLubyte
*) value
;
334 INIT_MONO_PIXEL(p
, color
);
336 if (DBG
) fprintf(stderr
, "WriteMonoRGBAPixels\n");
344 int fy
= Y_FLIP(y
[i
]);
345 if (CLIPPIXEL( x
[i
], fy
))
346 WRITE_PIXEL( x
[i
], fy
, p
);
352 int fy
= Y_FLIP(y
[i
]);
353 if (CLIPPIXEL( x
[i
], fy
))
354 WRITE_PIXEL( x
[i
], fy
, p
);
364 static void TAG(ReadRGBASpan
)( GLcontext
*ctx
,
365 struct gl_renderbuffer
*rb
,
366 GLuint n
, GLint x
, GLint y
, void *values
)
370 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
376 if (DBG
) fprintf(stderr
, "ReadRGBASpan\n");
381 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
382 for (;n1
>0;i
++,x1
++,n1
--)
383 READ_RGBA( rgba
[i
], x1
, y
);
391 #if defined(GET_PTR) && \
392 defined(USE_MMX_ASM) && \
393 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
394 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
395 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
396 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
397 static void TAG2(ReadRGBASpan
,_MMX
)( GLcontext
*ctx
,
398 struct gl_renderbuffer
*rb
,
399 GLuint n
, GLint x
, GLint y
, void *values
)
401 #ifndef USE_INNER_EMMS
402 /* The EMMS instruction is directly in-lined here because using GCC's
403 * built-in _mm_empty function was found to utterly destroy performance.
405 __asm__
__volatile__( "emms" );
410 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
416 if (DBG
) fprintf(stderr
, "ReadRGBASpan\n");
421 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
424 const void * src
= GET_PTR( x1
, y
);
425 #if (SPANTMP_PIXEL_FMT == GL_RGB) && \
426 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
427 _generic_read_RGBA_span_RGB565_MMX( src
, rgba
[i
], n1
);
429 _generic_read_RGBA_span_BGRA8888_REV_MMX( src
, rgba
[i
], n1
);
436 #ifndef USE_INNER_EMMS
437 __asm__
__volatile__( "emms" );
443 #if defined(GET_PTR) && \
444 defined(USE_SSE_ASM) && \
445 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
446 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
447 static void TAG2(ReadRGBASpan
,_SSE2
)( GLcontext
*ctx
,
448 struct gl_renderbuffer
*rb
,
449 GLuint n
, GLint x
, GLint y
,
454 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
460 if (DBG
) fprintf(stderr
, "ReadRGBASpan\n");
465 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
468 const void * src
= GET_PTR( x1
, y
);
469 _generic_read_RGBA_span_BGRA8888_REV_SSE2( src
, rgba
[i
], n1
);
478 #if defined(GET_PTR) && \
479 defined(USE_SSE_ASM) && \
480 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
481 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
482 static void TAG2(ReadRGBASpan
,_SSE
)( GLcontext
*ctx
,
483 struct gl_renderbuffer
*rb
,
484 GLuint n
, GLint x
, GLint y
,
487 #ifndef USE_INNER_EMMS
488 /* The EMMS instruction is directly in-lined here because using GCC's
489 * built-in _mm_empty function was found to utterly destroy performance.
491 __asm__
__volatile__( "emms" );
496 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
502 if (DBG
) fprintf(stderr
, "ReadRGBASpan\n");
507 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
510 const void * src
= GET_PTR( x1
, y
);
511 _generic_read_RGBA_span_BGRA8888_REV_SSE( src
, rgba
[i
], n1
);
517 #ifndef USE_INNER_EMMS
518 __asm__
__volatile__( "emms" );
524 static void TAG(ReadRGBAPixels
)( GLcontext
*ctx
,
525 struct gl_renderbuffer
*rb
,
526 GLuint n
, const GLint x
[], const GLint y
[],
531 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
532 GLubyte
*mask
= NULL
; /* remove someday */
536 if (DBG
) fprintf(stderr
, "ReadRGBAPixels\n");
544 int fy
= Y_FLIP( y
[i
] );
545 if (CLIPPIXEL( x
[i
], fy
))
546 READ_RGBA( rgba
[i
], x
[i
], fy
);
552 int fy
= Y_FLIP( y
[i
] );
553 if (CLIPPIXEL( x
[i
], fy
))
554 READ_RGBA( rgba
[i
], x
[i
], fy
);
563 static void TAG(InitPointers
)(struct gl_renderbuffer
*rb
)
565 rb
->PutRow
= TAG(WriteRGBASpan
);
566 rb
->PutRowRGB
= TAG(WriteRGBSpan
);
567 rb
->PutMonoRow
= TAG(WriteMonoRGBASpan
);
568 rb
->PutValues
= TAG(WriteRGBAPixels
);
569 rb
->PutMonoValues
= TAG(WriteMonoRGBAPixels
);
570 rb
->GetValues
= TAG(ReadRGBAPixels
);
573 #if defined(USE_SSE_ASM) && \
574 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
575 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
576 if ( cpu_has_xmm2
) {
577 if (DBG
) fprintf( stderr
, "Using %s version of GetRow\n", "SSE2" );
578 rb
->GetRow
= TAG2(ReadRGBASpan
, _SSE2
);
582 #if defined(USE_SSE_ASM) && \
583 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
584 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
586 if (DBG
) fprintf( stderr
, "Using %s version of GetRow\n", "SSE" );
587 rb
->GetRow
= TAG2(ReadRGBASpan
, _SSE
);
591 #if defined(USE_MMX_ASM) && \
592 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
593 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
594 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
595 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
597 if (DBG
) fprintf( stderr
, "Using %s version of GetRow\n", "MMX" );
598 rb
->GetRow
= TAG2(ReadRGBASpan
, _MMX
);
604 if (DBG
) fprintf( stderr
, "Using %s version of GetRow\n", "C" );
605 rb
->GetRow
= TAG(ReadRGBASpan
);
611 #undef INIT_MONO_PIXEL
620 #undef SPANTMP_PIXEL_FMT
621 #undef SPANTMP_PIXEL_TYPE