2 * Copyright 2000-2001 VA Linux Systems, Inc.
3 * (C) Copyright IBM Corporation 2004
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
29 * Template file of span read / write functions.
31 * \author Keith Whitwell <keithw@tungstengraphics.com>
32 * \author Gareth Hughes <gareth@nvidia.com>
33 * \author Ian Romanick <idr@us.ibm.com>
36 #include "main/colormac.h"
37 #include "spantmp_common.h"
43 #ifndef HW_READ_CLIPLOOP
44 #define HW_READ_CLIPLOOP() HW_CLIPLOOP()
47 #ifndef HW_WRITE_CLIPLOOP
48 #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP()
51 #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
54 ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5
59 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
62 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
63 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
64 #endif /* GET_VALUE */
66 #define INIT_MONO_PIXEL(p, color) \
67 p = PACK_COLOR_565( color[0], color[1], color[2] )
69 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
70 PUT_VALUE(_x, _y, ((((int)r & 0xf8) << 8) | \
71 (((int)g & 0xfc) << 3) | \
72 (((int)b & 0xf8) >> 3))) \
74 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
76 #define READ_RGBA( rgba, _x, _y ) \
78 GLushort p = GET_VALUE(_x, _y); \
79 rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \
80 rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \
81 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
85 #elif (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5_REV)
88 ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV
93 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
96 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
97 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
98 #endif /* GET_VALUE */
100 #define INIT_MONO_PIXEL(p, color) \
101 p = PACK_COLOR_565_REV( color[0], color[1], color[2] )
103 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
104 PUT_VALUE(_x, _y, PACK_COLOR_565_REV( r, g, b ))
106 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
108 #define READ_RGBA( rgba, _x, _y ) \
110 GLushort p = GET_VALUE(_x, _y); \
111 p = p << 8 | p >> 8; \
112 rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \
113 rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \
114 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
118 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_4_4_4_4)
121 ** GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4
126 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
129 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
130 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
131 #endif /* GET_VALUE */
133 #define INIT_MONO_PIXEL(p, color) \
134 p = PACK_COLOR_4444_REV(color[3], color[0], color[1], color[2])
136 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
137 PUT_VALUE(_x, _y, PACK_COLOR_4444_REV(a, r, g, b)) \
139 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
141 #define READ_RGBA( rgba, _x, _y ) \
143 GLushort p = GET_VALUE(_x, _y); \
144 rgba[0] = ((p >> 0) & 0xf) * 0x11; \
145 rgba[1] = ((p >> 12) & 0xf) * 0x11; \
146 rgba[2] = ((p >> 4) & 0xf) * 0x11; \
147 rgba[3] = ((p >> 8) & 0xf) * 0x11; \
151 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_4_4_4_4_REV)
154 ** GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV
159 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
162 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
163 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
164 #endif /* GET_VALUE */
166 #define INIT_MONO_PIXEL(p, color) \
167 p = PACK_COLOR_4444(color[3], color[0], color[1], color[2])
169 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
170 PUT_VALUE(_x, _y, PACK_COLOR_4444(a, r, g, b)) \
172 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
174 #define READ_RGBA( rgba, _x, _y ) \
176 GLushort p = GET_VALUE(_x, _y); \
177 rgba[0] = ((p >> 8) & 0xf) * 0x11; \
178 rgba[1] = ((p >> 4) & 0xf) * 0x11; \
179 rgba[2] = ((p >> 0) & 0xf) * 0x11; \
180 rgba[3] = ((p >> 12) & 0xf) * 0x11; \
184 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_1_5_5_5_REV)
187 ** GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV
192 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
195 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
196 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
197 #endif /* GET_VALUE */
199 #define INIT_MONO_PIXEL(p, color) \
200 p = PACK_COLOR_1555(color[3], color[0], color[1], color[2])
202 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
203 PUT_VALUE(_x, _y, PACK_COLOR_1555(a, r, g, b)) \
205 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
207 #define READ_RGBA( rgba, _x, _y ) \
209 GLushort p = GET_VALUE(_x, _y); \
210 rgba[0] = ((p >> 7) & 0xf8) * 255 / 0xf8; \
211 rgba[1] = ((p >> 2) & 0xf8) * 255 / 0xf8; \
212 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
213 rgba[3] = ((p >> 15) & 0x1) * 0xff; \
216 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_1_5_5_5)
219 ** GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5
224 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
227 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
228 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
229 #endif /* GET_VALUE */
231 #define INIT_MONO_PIXEL(p, color) \
232 p = PACK_COLOR_1555_REV(color[3], color[0], color[1], color[2])
234 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
235 PUT_VALUE(_x, _y, PACK_COLOR_1555_REV(a, r, g, b)) \
237 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
239 #define READ_RGBA( rgba, _x, _y ) \
241 GLushort p = GET_VALUE(_x, _y); \
242 p = p << 8 | p >> 8; \
243 rgba[0] = ((p >> 7) & 0xf8) * 255 / 0xf8; \
244 rgba[1] = ((p >> 2) & 0xf8) * 255 / 0xf8; \
245 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
246 rgba[3] = ((p >> 15) & 0x1) * 0xff; \
249 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
252 ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV
257 #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
260 #define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
261 #define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
262 #endif /* GET_VALUE */
264 # define INIT_MONO_PIXEL(p, color) \
265 p = PACK_COLOR_8888(color[3], color[0], color[1], color[2])
267 # define WRITE_RGBA(_x, _y, r, g, b, a) \
268 PUT_VALUE(_x, _y, ((r << 16) | \
273 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
275 # if defined( USE_X86_ASM )
276 # define READ_RGBA(rgba, _x, _y) \
278 GLuint p = GET_VALUE(_x, _y); \
279 __asm__ __volatile__( "bswap %0; rorl $8, %0" \
280 : "=r" (p) : "0" (p) ); \
281 ((GLuint *)rgba)[0] = p; \
283 # elif defined( MESA_BIG_ENDIAN )
284 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
285 * rotlwi instruction. It also produces good code on SPARC.
287 # define READ_RGBA( rgba, _x, _y ) \
289 GLuint p = GET_VALUE(_x, _y); \
291 *((uint32_t *) rgba) = (t >> 24) | (p << 8); \
294 # define READ_RGBA( rgba, _x, _y ) \
296 GLuint p = GET_VALUE(_x, _y); \
297 rgba[0] = (p >> 16) & 0xff; \
298 rgba[1] = (p >> 8) & 0xff; \
299 rgba[2] = (p >> 0) & 0xff; \
300 rgba[3] = (p >> 24) & 0xff; \
304 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8)
307 ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8
312 #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
315 #define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
316 #define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
317 #endif /* GET_VALUE */
319 # define INIT_MONO_PIXEL(p, color) \
320 p = PACK_COLOR_8888(color[2], color[1], color[0], color[3])
322 # define WRITE_RGBA(_x, _y, r, g, b, a) \
323 PUT_VALUE(_x, _y, ((r << 8) | \
328 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
330 # if defined( USE_X86_ASM )
331 # define READ_RGBA(rgba, _x, _y) \
333 GLuint p = GET_VALUE(_x, _y); \
334 __asm__ __volatile__( "rorl $8, %0" \
335 : "=r" (p) : "0" (p) ); \
336 ((GLuint *)rgba)[0] = p; \
338 # elif defined( MESA_BIG_ENDIAN )
339 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
340 * rotlwi instruction. It also produces good code on SPARC.
342 # define READ_RGBA( rgba, _x, _y ) \
344 GLuint p = CPU_TO_LE32(GET_VALUE(_x, _y)); \
346 *((uint32_t *) rgba) = (t >> 24) | (p << 8); \
349 # define READ_RGBA( rgba, _x, _y ) \
351 GLuint p = GET_VALUE(_x, _y); \
352 rgba[0] = (p >> 8) & 0xff; \
353 rgba[1] = (p >> 16) & 0xff; \
354 rgba[2] = (p >> 24) & 0xff; \
355 rgba[3] = (p >> 0) & 0xff; \
359 #elif (SPANTMP_PIXEL_FMT == GL_BGR) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
362 ** GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV
364 ** This is really for MESA_FORMAT_XRGB8888. The spantmp code needs to be
365 ** kicked to the curb, and we need to just code-gen this.
370 #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
373 #define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
374 #define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
375 #endif /* GET_VALUE */
377 # define INIT_MONO_PIXEL(p, color) \
378 p = PACK_COLOR_8888(0xff, color[0], color[1], color[2])
380 # define WRITE_RGBA(_x, _y, r, g, b, a) \
381 PUT_VALUE(_x, _y, ((r << 16) | \
386 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
388 # if defined( USE_X86_ASM )
389 # define READ_RGBA(rgba, _x, _y) \
391 GLuint p = GET_VALUE(_x, _y); \
392 __asm__ __volatile__( "bswap %0; rorl $8, %0" \
393 : "=r" (p) : "0" (p) ); \
394 ((GLuint *)rgba)[0] = p | 0xff000000; \
396 # elif defined( MESA_BIG_ENDIAN )
397 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
398 * rotlwi instruction. It also produces good code on SPARC.
400 # define READ_RGBA( rgba, _x, _y ) \
402 GLuint p = GET_VALUE(_x, _y); \
403 *((uint32_t *) rgba) = (p << 8) | 0xff; \
406 # define READ_RGBA( rgba, _x, _y ) \
408 GLuint p = GET_VALUE(_x, _y); \
409 rgba[0] = (p >> 16) & 0xff; \
410 rgba[1] = (p >> 8) & 0xff; \
411 rgba[2] = (p >> 0) & 0xff; \
416 #elif (SPANTMP_PIXEL_FMT == GL_ALPHA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_BYTE)
419 ** GL_ALPHA, GL_UNSIGNED_BYTE
424 #define GET_PTR(_x, _y) ( buf + (_x) + (_y) * pitch)
427 #define GET_VALUE(_x, _y) *(volatile GLubyte *)(GET_PTR(_x, _y))
428 #define PUT_VALUE(_x, _y, _v) *(volatile GLubyte *)(GET_PTR(_x, _y)) = (_v)
429 #endif /* GET_VALUE */
431 # define INIT_MONO_PIXEL(p, color) \
434 # define WRITE_RGBA(_x, _y, r, g, b, a) \
435 PUT_VALUE(_x, _y, a | (r & 0 /* quiet warnings */))
437 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
439 #define READ_RGBA( rgba, _x, _y ) \
441 GLubyte p = GET_VALUE(_x, _y); \
449 #error SPANTMP_PIXEL_FMT must be set to a valid value!
455 ** Assembly routines.
458 #if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM )
459 #include "x86/read_rgba_span_x86.h"
460 #include "x86/common_x86_asm.h"
463 static void TAG(WriteRGBASpan
)( GLcontext
*ctx
,
464 struct gl_renderbuffer
*rb
,
465 GLuint n
, GLint x
, GLint y
,
466 const void *values
, const GLubyte mask
[] )
470 const GLubyte (*rgba
)[4] = (const GLubyte (*)[4]) values
;
480 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
482 if (DBG
) fprintf(stderr
, "WriteRGBASpan %d..%d (x1 %d)\n",
483 (int)i
, (int)n1
, (int)x1
);
487 for (;n1
>0;i
++,x1
++,n1
--)
490 rgba
[i
][0], rgba
[i
][1],
491 rgba
[i
][2], rgba
[i
][3] );
495 for (;n1
>0;i
++,x1
++,n1
--)
497 rgba
[i
][0], rgba
[i
][1],
498 rgba
[i
][2], rgba
[i
][3] );
506 static void TAG(WriteRGBSpan
)( GLcontext
*ctx
,
507 struct gl_renderbuffer
*rb
,
508 GLuint n
, GLint x
, GLint y
,
509 const void *values
, const GLubyte mask
[] )
513 const GLubyte (*rgb
)[3] = (const GLubyte (*)[3]) values
;
523 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
525 if (DBG
) fprintf(stderr
, "WriteRGBSpan %d..%d (x1 %d)\n",
526 (int)i
, (int)n1
, (int)x1
);
530 for (;n1
>0;i
++,x1
++,n1
--)
532 WRITE_RGBA( x1
, y
, rgb
[i
][0], rgb
[i
][1], rgb
[i
][2], 255 );
536 for (;n1
>0;i
++,x1
++,n1
--)
537 WRITE_RGBA( x1
, y
, rgb
[i
][0], rgb
[i
][1], rgb
[i
][2], 255 );
545 static void TAG(WriteRGBAPixels
)( GLcontext
*ctx
,
546 struct gl_renderbuffer
*rb
,
547 GLuint n
, const GLint x
[], const GLint y
[],
548 const void *values
, const GLubyte mask
[] )
552 const GLubyte (*rgba
)[4] = (const GLubyte (*)[4]) values
;
556 if (DBG
) fprintf(stderr
, "WriteRGBAPixels\n");
565 const int fy
= Y_FLIP(y
[i
]);
566 if (CLIPPIXEL(x
[i
],fy
))
567 WRITE_RGBA( x
[i
], fy
,
568 rgba
[i
][0], rgba
[i
][1],
569 rgba
[i
][2], rgba
[i
][3] );
577 const int fy
= Y_FLIP(y
[i
]);
578 if (CLIPPIXEL(x
[i
],fy
))
579 WRITE_RGBA( x
[i
], fy
,
580 rgba
[i
][0], rgba
[i
][1],
581 rgba
[i
][2], rgba
[i
][3] );
591 static void TAG(WriteMonoRGBASpan
)( GLcontext
*ctx
,
592 struct gl_renderbuffer
*rb
,
593 GLuint n
, GLint x
, GLint y
,
594 const void *value
, const GLubyte mask
[] )
598 const GLubyte
*color
= (const GLubyte
*) value
;
602 INIT_MONO_PIXEL(p
, color
);
606 if (DBG
) fprintf(stderr
, "WriteMonoRGBASpan\n");
611 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
614 for (;n1
>0;i
++,x1
++,n1
--)
616 WRITE_PIXEL( x1
, y
, p
);
620 for (;n1
>0;i
++,x1
++,n1
--)
621 WRITE_PIXEL( x1
, y
, p
);
630 static void TAG(WriteMonoRGBAPixels
)( GLcontext
*ctx
,
631 struct gl_renderbuffer
*rb
,
633 const GLint x
[], const GLint y
[],
635 const GLubyte mask
[] )
639 const GLubyte
*color
= (const GLubyte
*) value
;
642 INIT_MONO_PIXEL(p
, color
);
644 if (DBG
) fprintf(stderr
, "WriteMonoRGBAPixels\n");
652 int fy
= Y_FLIP(y
[i
]);
653 if (CLIPPIXEL( x
[i
], fy
))
654 WRITE_PIXEL( x
[i
], fy
, p
);
660 int fy
= Y_FLIP(y
[i
]);
661 if (CLIPPIXEL( x
[i
], fy
))
662 WRITE_PIXEL( x
[i
], fy
, p
);
672 static void TAG(ReadRGBASpan
)( GLcontext
*ctx
,
673 struct gl_renderbuffer
*rb
,
674 GLuint n
, GLint x
, GLint y
, void *values
)
678 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
684 if (DBG
) fprintf(stderr
, "ReadRGBASpan\n");
689 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
690 for (;n1
>0;i
++,x1
++,n1
--)
691 READ_RGBA( rgba
[i
], x1
, y
);
699 #if defined(GET_PTR) && \
700 defined(USE_MMX_ASM) && \
701 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
702 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
703 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
704 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
705 static void TAG2(ReadRGBASpan
,_MMX
)( GLcontext
*ctx
,
706 struct gl_renderbuffer
*rb
,
707 GLuint n
, GLint x
, GLint y
, void *values
)
709 #ifndef USE_INNER_EMMS
710 /* The EMMS instruction is directly in-lined here because using GCC's
711 * built-in _mm_empty function was found to utterly destroy performance.
713 __asm__
__volatile__( "emms" );
718 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
724 if (DBG
) fprintf(stderr
, "ReadRGBASpan\n");
729 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
732 const void * src
= GET_PTR( x1
, y
);
733 #if (SPANTMP_PIXEL_FMT == GL_RGB) && \
734 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
735 _generic_read_RGBA_span_RGB565_MMX( src
, rgba
[i
], n1
);
737 _generic_read_RGBA_span_BGRA8888_REV_MMX( src
, rgba
[i
], n1
);
744 #ifndef USE_INNER_EMMS
745 __asm__
__volatile__( "emms" );
751 #if defined(GET_PTR) && \
752 defined(USE_SSE_ASM) && \
753 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
754 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
755 static void TAG2(ReadRGBASpan
,_SSE2
)( GLcontext
*ctx
,
756 struct gl_renderbuffer
*rb
,
757 GLuint n
, GLint x
, GLint y
,
762 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
768 if (DBG
) fprintf(stderr
, "ReadRGBASpan\n");
773 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
776 const void * src
= GET_PTR( x1
, y
);
777 _generic_read_RGBA_span_BGRA8888_REV_SSE2( src
, rgba
[i
], n1
);
786 #if defined(GET_PTR) && \
787 defined(USE_SSE_ASM) && \
788 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
789 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
790 static void TAG2(ReadRGBASpan
,_SSE
)( GLcontext
*ctx
,
791 struct gl_renderbuffer
*rb
,
792 GLuint n
, GLint x
, GLint y
,
795 #ifndef USE_INNER_EMMS
796 /* The EMMS instruction is directly in-lined here because using GCC's
797 * built-in _mm_empty function was found to utterly destroy performance.
799 __asm__
__volatile__( "emms" );
804 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
810 if (DBG
) fprintf(stderr
, "ReadRGBASpan\n");
815 CLIPSPAN(x
,y
,n
,x1
,n1
,i
);
818 const void * src
= GET_PTR( x1
, y
);
819 _generic_read_RGBA_span_BGRA8888_REV_SSE( src
, rgba
[i
], n1
);
825 #ifndef USE_INNER_EMMS
826 __asm__
__volatile__( "emms" );
832 static void TAG(ReadRGBAPixels
)( GLcontext
*ctx
,
833 struct gl_renderbuffer
*rb
,
834 GLuint n
, const GLint x
[], const GLint y
[],
839 GLubyte (*rgba
)[4] = (GLubyte (*)[4]) values
;
843 if (DBG
) fprintf(stderr
, "ReadRGBAPixels\n");
848 int fy
= Y_FLIP( y
[i
] );
849 if (CLIPPIXEL( x
[i
], fy
))
850 READ_RGBA( rgba
[i
], x
[i
], fy
);
858 static void TAG(InitPointers
)(struct gl_renderbuffer
*rb
)
860 rb
->PutRow
= TAG(WriteRGBASpan
);
861 rb
->PutRowRGB
= TAG(WriteRGBSpan
);
862 rb
->PutMonoRow
= TAG(WriteMonoRGBASpan
);
863 rb
->PutValues
= TAG(WriteRGBAPixels
);
864 rb
->PutMonoValues
= TAG(WriteMonoRGBAPixels
);
865 rb
->GetValues
= TAG(ReadRGBAPixels
);
868 #if defined(USE_SSE_ASM) && \
869 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
870 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
871 if ( cpu_has_xmm2
) {
872 if (DBG
) fprintf( stderr
, "Using %s version of GetRow\n", "SSE2" );
873 rb
->GetRow
= TAG2(ReadRGBASpan
, _SSE2
);
877 #if defined(USE_SSE_ASM) && \
878 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
879 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
881 if (DBG
) fprintf( stderr
, "Using %s version of GetRow\n", "SSE" );
882 rb
->GetRow
= TAG2(ReadRGBASpan
, _SSE
);
886 #if defined(USE_MMX_ASM) && \
887 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
888 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
889 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
890 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
892 if (DBG
) fprintf( stderr
, "Using %s version of GetRow\n", "MMX" );
893 rb
->GetRow
= TAG2(ReadRGBASpan
, _MMX
);
899 if (DBG
) fprintf( stderr
, "Using %s version of GetRow\n", "C" );
900 rb
->GetRow
= TAG(ReadRGBASpan
);
906 #undef INIT_MONO_PIXEL
915 #undef SPANTMP_PIXEL_FMT
916 #undef SPANTMP_PIXEL_TYPE