6030ca18a7e302c36f693ad5531fdeaaf003d217
[mesa.git] / src / mesa / drivers / dri / common / spantmp2.h
1 /*
2 * Copyright 2000-2001 VA Linux Systems, Inc.
3 * (C) Copyright IBM Corporation 2004
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file spantmp2.h
28 *
29 * Template file of span read / write functions.
30 *
31 * \author Keith Whitwell <keithw@tungstengraphics.com>
32 * \author Gareth Hughes <gareth@nvidia.com>
33 * \author Ian Romanick <idr@us.ibm.com>
34 */
35
36 #include "main/colormac.h"
37 #include "spantmp_common.h"
38
39 #ifndef DBG
40 #define DBG 0
41 #endif
42
43 #ifndef HW_READ_CLIPLOOP
44 #define HW_READ_CLIPLOOP() HW_CLIPLOOP()
45 #endif
46
47 #ifndef HW_WRITE_CLIPLOOP
48 #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP()
49 #endif
50
51 #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
52
53 /**
54 ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5
55 **/
56
57 #ifndef GET_VALUE
58 #ifndef GET_PTR
59 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
60 #endif
61
62 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
63 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
64 #endif /* GET_VALUE */
65
66 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
67 PUT_VALUE(_x, _y, ((((int)r & 0xf8) << 8) | \
68 (((int)g & 0xfc) << 3) | \
69 (((int)b & 0xf8) >> 3))) \
70
71 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
72
73 #define READ_RGBA( rgba, _x, _y ) \
74 do { \
75 GLushort p = GET_VALUE(_x, _y); \
76 rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \
77 rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \
78 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
79 rgba[3] = 0xff; \
80 } while (0)
81
82 #elif (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5_REV)
83
84 /**
85 ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV
86 **/
87
88 #ifndef GET_VALUE
89 #ifndef GET_PTR
90 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
91 #endif
92
93 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
94 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
95 #endif /* GET_VALUE */
96
97 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
98 PUT_VALUE(_x, _y, PACK_COLOR_565_REV( r, g, b ))
99
100 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
101
102 #define READ_RGBA( rgba, _x, _y ) \
103 do { \
104 GLushort p = GET_VALUE(_x, _y); \
105 p = p << 8 | p >> 8; \
106 rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \
107 rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \
108 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
109 rgba[3] = 0xff; \
110 } while (0)
111
112 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_4_4_4_4)
113
114 /**
115 ** GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4
116 **/
117
118 #ifndef GET_VALUE
119 #ifndef GET_PTR
120 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
121 #endif
122
123 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
124 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
125 #endif /* GET_VALUE */
126
127 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
128 PUT_VALUE(_x, _y, PACK_COLOR_4444_REV(a, r, g, b)) \
129
130 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
131
132 #define READ_RGBA( rgba, _x, _y ) \
133 do { \
134 GLushort p = GET_VALUE(_x, _y); \
135 rgba[0] = ((p >> 0) & 0xf) * 0x11; \
136 rgba[1] = ((p >> 12) & 0xf) * 0x11; \
137 rgba[2] = ((p >> 4) & 0xf) * 0x11; \
138 rgba[3] = ((p >> 8) & 0xf) * 0x11; \
139 } while (0)
140
141
142 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_4_4_4_4_REV)
143
144 /**
145 ** GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV
146 **/
147
148 #ifndef GET_VALUE
149 #ifndef GET_PTR
150 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
151 #endif
152
153 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
154 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
155 #endif /* GET_VALUE */
156
157 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
158 PUT_VALUE(_x, _y, PACK_COLOR_4444(a, r, g, b)) \
159
160 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
161
162 #define READ_RGBA( rgba, _x, _y ) \
163 do { \
164 GLushort p = GET_VALUE(_x, _y); \
165 rgba[0] = ((p >> 8) & 0xf) * 0x11; \
166 rgba[1] = ((p >> 4) & 0xf) * 0x11; \
167 rgba[2] = ((p >> 0) & 0xf) * 0x11; \
168 rgba[3] = ((p >> 12) & 0xf) * 0x11; \
169 } while (0)
170
171
172 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_1_5_5_5_REV)
173
174 /**
175 ** GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV
176 **/
177
178 #ifndef GET_VALUE
179 #ifndef GET_PTR
180 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
181 #endif
182
183 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
184 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
185 #endif /* GET_VALUE */
186
187 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
188 PUT_VALUE(_x, _y, PACK_COLOR_1555(a, r, g, b)) \
189
190 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
191
192 #define READ_RGBA( rgba, _x, _y ) \
193 do { \
194 GLushort p = GET_VALUE(_x, _y); \
195 rgba[0] = ((p >> 7) & 0xf8) * 255 / 0xf8; \
196 rgba[1] = ((p >> 2) & 0xf8) * 255 / 0xf8; \
197 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
198 rgba[3] = ((p >> 15) & 0x1) * 0xff; \
199 } while (0)
200
201 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_1_5_5_5)
202
203 /**
204 ** GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5
205 **/
206
207 #ifndef GET_VALUE
208 #ifndef GET_PTR
209 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
210 #endif
211
212 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
213 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
214 #endif /* GET_VALUE */
215
216 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
217 PUT_VALUE(_x, _y, PACK_COLOR_1555_REV(a, r, g, b)) \
218
219 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
220
221 #define READ_RGBA( rgba, _x, _y ) \
222 do { \
223 GLushort p = GET_VALUE(_x, _y); \
224 p = p << 8 | p >> 8; \
225 rgba[0] = ((p >> 7) & 0xf8) * 255 / 0xf8; \
226 rgba[1] = ((p >> 2) & 0xf8) * 255 / 0xf8; \
227 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
228 rgba[3] = ((p >> 15) & 0x1) * 0xff; \
229 } while (0)
230
231 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
232
233 /**
234 ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV
235 **/
236
237 #ifndef GET_VALUE
238 #ifndef GET_PTR
239 #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
240 #endif
241
242 #define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
243 #define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
244 #endif /* GET_VALUE */
245
246 # define WRITE_RGBA(_x, _y, r, g, b, a) \
247 PUT_VALUE(_x, _y, ((r << 16) | \
248 (g << 8) | \
249 (b << 0) | \
250 (a << 24)))
251
252 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
253
254 # if defined( USE_X86_ASM )
255 # define READ_RGBA(rgba, _x, _y) \
256 do { \
257 GLuint p = GET_VALUE(_x, _y); \
258 __asm__ __volatile__( "bswap %0; rorl $8, %0" \
259 : "=r" (p) : "0" (p) ); \
260 ((GLuint *)rgba)[0] = p; \
261 } while (0)
262 # elif defined( MESA_BIG_ENDIAN )
263 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
264 * rotlwi instruction. It also produces good code on SPARC.
265 */
266 # define READ_RGBA( rgba, _x, _y ) \
267 do { \
268 GLuint p = GET_VALUE(_x, _y); \
269 GLuint t = p; \
270 *((uint32_t *) rgba) = (t >> 24) | (p << 8); \
271 } while (0)
272 # else
273 # define READ_RGBA( rgba, _x, _y ) \
274 do { \
275 GLuint p = GET_VALUE(_x, _y); \
276 rgba[0] = (p >> 16) & 0xff; \
277 rgba[1] = (p >> 8) & 0xff; \
278 rgba[2] = (p >> 0) & 0xff; \
279 rgba[3] = (p >> 24) & 0xff; \
280 } while (0)
281 # endif
282
283 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8)
284
285 /**
286 ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8
287 **/
288
289 #ifndef GET_VALUE
290 #ifndef GET_PTR
291 #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
292 #endif
293
294 #define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
295 #define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
296 #endif /* GET_VALUE */
297
298 # define WRITE_RGBA(_x, _y, r, g, b, a) \
299 PUT_VALUE(_x, _y, ((r << 8) | \
300 (g << 16) | \
301 (b << 24) | \
302 (a << 0)))
303
304 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
305
306 # if defined( USE_X86_ASM )
307 # define READ_RGBA(rgba, _x, _y) \
308 do { \
309 GLuint p = GET_VALUE(_x, _y); \
310 __asm__ __volatile__( "rorl $8, %0" \
311 : "=r" (p) : "0" (p) ); \
312 ((GLuint *)rgba)[0] = p; \
313 } while (0)
314 # elif defined( MESA_BIG_ENDIAN )
315 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
316 * rotlwi instruction. It also produces good code on SPARC.
317 */
318 # define READ_RGBA( rgba, _x, _y ) \
319 do { \
320 GLuint p = CPU_TO_LE32(GET_VALUE(_x, _y)); \
321 GLuint t = p; \
322 *((uint32_t *) rgba) = (t >> 24) | (p << 8); \
323 } while (0)
324 # else
325 # define READ_RGBA( rgba, _x, _y ) \
326 do { \
327 GLuint p = GET_VALUE(_x, _y); \
328 rgba[0] = (p >> 8) & 0xff; \
329 rgba[1] = (p >> 16) & 0xff; \
330 rgba[2] = (p >> 24) & 0xff; \
331 rgba[3] = (p >> 0) & 0xff; \
332 } while (0)
333 # endif
334
335 #elif (SPANTMP_PIXEL_FMT == GL_BGR) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
336
337 /**
338 ** GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV
339 **
340 ** This is really for MESA_FORMAT_XRGB8888. The spantmp code needs to be
341 ** kicked to the curb, and we need to just code-gen this.
342 **/
343
344 #ifndef GET_VALUE
345 #ifndef GET_PTR
346 #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
347 #endif
348
349 #define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
350 #define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
351 #endif /* GET_VALUE */
352
353 # define WRITE_RGBA(_x, _y, r, g, b, a) \
354 PUT_VALUE(_x, _y, ((r << 16) | \
355 (g << 8) | \
356 (b << 0) | \
357 (0xff << 24)))
358
359 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
360
361 # if defined( USE_X86_ASM )
362 # define READ_RGBA(rgba, _x, _y) \
363 do { \
364 GLuint p = GET_VALUE(_x, _y); \
365 __asm__ __volatile__( "bswap %0; rorl $8, %0" \
366 : "=r" (p) : "0" (p) ); \
367 ((GLuint *)rgba)[0] = p | 0xff000000; \
368 } while (0)
369 # elif defined( MESA_BIG_ENDIAN )
370 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
371 * rotlwi instruction. It also produces good code on SPARC.
372 */
373 # define READ_RGBA( rgba, _x, _y ) \
374 do { \
375 GLuint p = GET_VALUE(_x, _y); \
376 *((uint32_t *) rgba) = (p << 8) | 0xff; \
377 } while (0)
378 # else
379 # define READ_RGBA( rgba, _x, _y ) \
380 do { \
381 GLuint p = GET_VALUE(_x, _y); \
382 rgba[0] = (p >> 16) & 0xff; \
383 rgba[1] = (p >> 8) & 0xff; \
384 rgba[2] = (p >> 0) & 0xff; \
385 rgba[3] = 0xff; \
386 } while (0)
387 # endif
388
389 #elif (SPANTMP_PIXEL_FMT == GL_ALPHA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_BYTE)
390
391 /**
392 ** GL_ALPHA, GL_UNSIGNED_BYTE
393 **/
394
395 #ifndef GET_VALUE
396 #ifndef GET_PTR
397 #define GET_PTR(_x, _y) ( buf + (_x) + (_y) * pitch)
398 #endif
399
400 #define GET_VALUE(_x, _y) *(volatile GLubyte *)(GET_PTR(_x, _y))
401 #define PUT_VALUE(_x, _y, _v) *(volatile GLubyte *)(GET_PTR(_x, _y)) = (_v)
402 #endif /* GET_VALUE */
403
404 # define WRITE_RGBA(_x, _y, r, g, b, a) \
405 PUT_VALUE(_x, _y, a | (r & 0 /* quiet warnings */))
406
407 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
408
409 #define READ_RGBA( rgba, _x, _y ) \
410 do { \
411 GLubyte p = GET_VALUE(_x, _y); \
412 rgba[0] = 0; \
413 rgba[1] = 0; \
414 rgba[2] = 0; \
415 rgba[3] = p; \
416 } while (0)
417
418 #else
419 #error SPANTMP_PIXEL_FMT must be set to a valid value!
420 #endif
421
422
423
424 /**
425 ** Assembly routines.
426 **/
427
428 #if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM )
429 #include "x86/read_rgba_span_x86.h"
430 #include "x86/common_x86_asm.h"
431 #endif
432
433 static void TAG(WriteRGBASpan)( struct gl_context *ctx,
434 struct gl_renderbuffer *rb,
435 GLuint n, GLint x, GLint y,
436 const void *values, const GLubyte mask[] )
437 {
438 (void) ctx;
439
440 HW_WRITE_LOCK()
441 {
442 const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
443 GLint x1;
444 GLint n1;
445 LOCAL_VARS;
446
447 y = Y_FLIP(y);
448
449 HW_WRITE_CLIPLOOP()
450 {
451 GLint i = 0;
452 CLIPSPAN(x,y,n,x1,n1,i);
453
454 if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
455 (int)i, (int)n1, (int)x1);
456
457 if (mask)
458 {
459 for (;n1>0;i++,x1++,n1--)
460 if (mask[i])
461 WRITE_RGBA( x1, y,
462 rgba[i][0], rgba[i][1],
463 rgba[i][2], rgba[i][3] );
464 }
465 else
466 {
467 for (;n1>0;i++,x1++,n1--)
468 WRITE_RGBA( x1, y,
469 rgba[i][0], rgba[i][1],
470 rgba[i][2], rgba[i][3] );
471 }
472 }
473 HW_ENDCLIPLOOP();
474 }
475 HW_WRITE_UNLOCK();
476 }
477
478 static void TAG(WriteRGBSpan)( struct gl_context *ctx,
479 struct gl_renderbuffer *rb,
480 GLuint n, GLint x, GLint y,
481 const void *values, const GLubyte mask[] )
482 {
483 (void) ctx;
484
485 HW_WRITE_LOCK()
486 {
487 const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
488 GLint x1;
489 GLint n1;
490 LOCAL_VARS;
491
492 y = Y_FLIP(y);
493
494 HW_WRITE_CLIPLOOP()
495 {
496 GLint i = 0;
497 CLIPSPAN(x,y,n,x1,n1,i);
498
499 if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
500 (int)i, (int)n1, (int)x1);
501
502 if (mask)
503 {
504 for (;n1>0;i++,x1++,n1--)
505 if (mask[i])
506 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
507 }
508 else
509 {
510 for (;n1>0;i++,x1++,n1--)
511 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
512 }
513 }
514 HW_ENDCLIPLOOP();
515 }
516 HW_WRITE_UNLOCK();
517 }
518
519 static void TAG(WriteRGBAPixels)( struct gl_context *ctx,
520 struct gl_renderbuffer *rb,
521 GLuint n, const GLint x[], const GLint y[],
522 const void *values, const GLubyte mask[] )
523 {
524 (void) ctx;
525
526 HW_WRITE_LOCK()
527 {
528 const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
529 GLint i;
530 LOCAL_VARS;
531
532 if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
533
534 HW_WRITE_CLIPLOOP()
535 {
536 if (mask)
537 {
538 for (i=0;i<n;i++)
539 {
540 if (mask[i]) {
541 const int fy = Y_FLIP(y[i]);
542 if (CLIPPIXEL(x[i],fy))
543 WRITE_RGBA( x[i], fy,
544 rgba[i][0], rgba[i][1],
545 rgba[i][2], rgba[i][3] );
546 }
547 }
548 }
549 else
550 {
551 for (i=0;i<n;i++)
552 {
553 const int fy = Y_FLIP(y[i]);
554 if (CLIPPIXEL(x[i],fy))
555 WRITE_RGBA( x[i], fy,
556 rgba[i][0], rgba[i][1],
557 rgba[i][2], rgba[i][3] );
558 }
559 }
560 }
561 HW_ENDCLIPLOOP();
562 }
563 HW_WRITE_UNLOCK();
564 }
565
566
567 static void TAG(ReadRGBASpan)( struct gl_context *ctx,
568 struct gl_renderbuffer *rb,
569 GLuint n, GLint x, GLint y, void *values)
570 {
571 (void) ctx;
572
573 HW_READ_LOCK()
574 {
575 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
576 GLint x1,n1;
577 LOCAL_VARS;
578
579 y = Y_FLIP(y);
580
581 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
582
583 HW_READ_CLIPLOOP()
584 {
585 GLint i = 0;
586 CLIPSPAN(x,y,n,x1,n1,i);
587 for (;n1>0;i++,x1++,n1--)
588 READ_RGBA( rgba[i], x1, y );
589 }
590 HW_ENDCLIPLOOP();
591 }
592 HW_READ_UNLOCK();
593 }
594
595
596 #if defined(GET_PTR) && \
597 defined(USE_MMX_ASM) && \
598 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
599 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
600 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
601 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
602 static void TAG2(ReadRGBASpan,_MMX)( struct gl_context *ctx,
603 struct gl_renderbuffer *rb,
604 GLuint n, GLint x, GLint y, void *values)
605 {
606 #ifndef USE_INNER_EMMS
607 /* The EMMS instruction is directly in-lined here because using GCC's
608 * built-in _mm_empty function was found to utterly destroy performance.
609 */
610 __asm__ __volatile__( "emms" );
611 #endif
612
613 (void) ctx;
614
615 HW_READ_LOCK()
616 {
617 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
618 GLint x1,n1;
619 LOCAL_VARS;
620
621 y = Y_FLIP(y);
622
623 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
624
625 HW_READ_CLIPLOOP()
626 {
627 GLint i = 0;
628 CLIPSPAN(x,y,n,x1,n1,i);
629
630 {
631 const void * src = GET_PTR( x1, y );
632 #if (SPANTMP_PIXEL_FMT == GL_RGB) && \
633 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
634 _generic_read_RGBA_span_RGB565_MMX( src, rgba[i], n1 );
635 #else
636 _generic_read_RGBA_span_BGRA8888_REV_MMX( src, rgba[i], n1 );
637 #endif
638 }
639 }
640 HW_ENDCLIPLOOP();
641 }
642 HW_READ_UNLOCK();
643 #ifndef USE_INNER_EMMS
644 __asm__ __volatile__( "emms" );
645 #endif
646 }
647 #endif
648
649
650 #if defined(GET_PTR) && \
651 defined(USE_SSE_ASM) && \
652 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
653 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
654 static void TAG2(ReadRGBASpan,_SSE2)( struct gl_context *ctx,
655 struct gl_renderbuffer *rb,
656 GLuint n, GLint x, GLint y,
657 void *values)
658 {
659 (void) ctx;
660
661 HW_READ_LOCK()
662 {
663 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
664 GLint x1,n1;
665 LOCAL_VARS;
666
667 y = Y_FLIP(y);
668
669 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
670
671 HW_READ_CLIPLOOP()
672 {
673 GLint i = 0;
674 CLIPSPAN(x,y,n,x1,n1,i);
675
676 {
677 const void * src = GET_PTR( x1, y );
678 _generic_read_RGBA_span_BGRA8888_REV_SSE2( src, rgba[i], n1 );
679 }
680 }
681 HW_ENDCLIPLOOP();
682 }
683 HW_READ_UNLOCK();
684 }
685 #endif
686
687 #if defined(GET_PTR) && \
688 defined(USE_SSE_ASM) && \
689 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
690 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
691 static void TAG2(ReadRGBASpan,_SSE)( struct gl_context *ctx,
692 struct gl_renderbuffer *rb,
693 GLuint n, GLint x, GLint y,
694 void *values)
695 {
696 #ifndef USE_INNER_EMMS
697 /* The EMMS instruction is directly in-lined here because using GCC's
698 * built-in _mm_empty function was found to utterly destroy performance.
699 */
700 __asm__ __volatile__( "emms" );
701 #endif
702
703 (void) ctx;
704
705 HW_READ_LOCK()
706 {
707 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
708 GLint x1,n1;
709 LOCAL_VARS;
710
711 y = Y_FLIP(y);
712
713 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
714
715 HW_READ_CLIPLOOP()
716 {
717 GLint i = 0;
718 CLIPSPAN(x,y,n,x1,n1,i);
719
720 {
721 const void * src = GET_PTR( x1, y );
722 _generic_read_RGBA_span_BGRA8888_REV_SSE( src, rgba[i], n1 );
723 }
724 }
725 HW_ENDCLIPLOOP();
726 }
727 HW_READ_UNLOCK();
728 #ifndef USE_INNER_EMMS
729 __asm__ __volatile__( "emms" );
730 #endif
731 }
732 #endif
733
734
735 static void TAG(ReadRGBAPixels)( struct gl_context *ctx,
736 struct gl_renderbuffer *rb,
737 GLuint n, const GLint x[], const GLint y[],
738 void *values )
739 {
740 (void) ctx;
741
742 HW_READ_LOCK()
743 {
744 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
745 GLint i;
746 LOCAL_VARS;
747
748 if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
749
750 HW_READ_CLIPLOOP()
751 {
752 for (i=0;i<n;i++) {
753 int fy = Y_FLIP( y[i] );
754 if (CLIPPIXEL( x[i], fy ))
755 READ_RGBA( rgba[i], x[i], fy );
756 }
757 }
758 HW_ENDCLIPLOOP();
759 }
760 HW_READ_UNLOCK();
761 }
762
763 static void TAG(InitPointers)(struct gl_renderbuffer *rb)
764 {
765 rb->PutRow = TAG(WriteRGBASpan);
766 rb->PutRowRGB = TAG(WriteRGBSpan);
767 rb->PutValues = TAG(WriteRGBAPixels);
768 rb->GetValues = TAG(ReadRGBAPixels);
769
770 #if defined(GET_PTR)
771 #if defined(USE_SSE_ASM) && \
772 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
773 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
774 if ( cpu_has_xmm2 ) {
775 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE2" );
776 rb->GetRow = TAG2(ReadRGBASpan, _SSE2);
777 }
778 else
779 #endif
780 #if defined(USE_SSE_ASM) && \
781 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
782 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
783 if ( cpu_has_xmm ) {
784 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE" );
785 rb->GetRow = TAG2(ReadRGBASpan, _SSE);
786 }
787 else
788 #endif
789 #if defined(USE_MMX_ASM) && \
790 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
791 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
792 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
793 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
794 if ( cpu_has_mmx ) {
795 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "MMX" );
796 rb->GetRow = TAG2(ReadRGBASpan, _MMX);
797 }
798 else
799 #endif
800 #endif /* GET_PTR */
801 {
802 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "C" );
803 rb->GetRow = TAG(ReadRGBASpan);
804 }
805
806 }
807
808
809 #undef WRITE_PIXEL
810 #undef WRITE_RGBA
811 #undef READ_RGBA
812 #undef TAG
813 #undef TAG2
814 #undef GET_VALUE
815 #undef PUT_VALUE
816 #undef GET_PTR
817 #undef SPANTMP_PIXEL_FMT
818 #undef SPANTMP_PIXEL_TYPE