Major check-in of changes for GL_EXT_framebuffer_object extension.
[mesa.git] / src / mesa / drivers / dri / common / spantmp2.h
1 /*
2 * Copyright 2000-2001 VA Linux Systems, Inc.
3 * (C) Copyright IBM Corporation 2004
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file spantmp2.h
28 *
29 * Template file of span read / write functions.
30 *
31 * \author Keith Whitwell <keithw@tungstengraphics.com>
32 * \author Gareth Hughes <gareth@nvidia.com>
33 * \author Ian Romanick <idr@us.ibm.com>
34 */
35
36 #include "colormac.h"
37
38 #ifndef DBG
39 #define DBG 0
40 #endif
41
42 #ifndef HW_WRITE_LOCK
43 #define HW_WRITE_LOCK() HW_LOCK()
44 #endif
45
46 #ifndef HW_WRITE_UNLOCK
47 #define HW_WRITE_UNLOCK() HW_UNLOCK()
48 #endif
49
50 #ifndef HW_READ_LOCK
51 #define HW_READ_LOCK() HW_LOCK()
52 #endif
53
54 #ifndef HW_READ_UNLOCK
55 #define HW_READ_UNLOCK() HW_UNLOCK()
56 #endif
57
58 #ifndef HW_READ_CLIPLOOP
59 #define HW_READ_CLIPLOOP() HW_CLIPLOOP()
60 #endif
61
62 #ifndef HW_WRITE_CLIPLOOP
63 #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP()
64 #endif
65
66 #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
67
68 #define INIT_MONO_PIXEL(p, color) \
69 p = PACK_COLOR_565( color[0], color[1], color[2] )
70
71 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
72 do { \
73 GLshort * _p = (GLshort *) GET_DST_PTR(_x, _y); \
74 _p[0] = ((((int)r & 0xf8) << 8) | (((int)g & 0xfc) << 3) | \
75 (((int)b & 0xf8) >> 3)); \
76 } while(0)
77
78 #define WRITE_PIXEL( _x, _y, p ) \
79 do { \
80 GLushort * _p = (GLushort *) GET_DST_PTR(_x, _y); \
81 _p[0] = p; \
82 } while(0)
83
84 #define READ_RGBA( rgba, _x, _y ) \
85 do { \
86 GLushort p = *(volatile GLshort *) GET_SRC_PTR(_x, _y); \
87 rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \
88 rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \
89 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
90 rgba[3] = 0xff; \
91 } while (0)
92
93 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
94
95 # define INIT_MONO_PIXEL(p, color) \
96 p = PACK_COLOR_8888(color[3], color[0], color[1], color[2])
97
98 # define WRITE_RGBA(_x, _y, r, g, b, a) \
99 do { \
100 GLuint * _p = (GLuint *) GET_DST_PTR(_x, _y); \
101 _p[0] = ((r << 16) | (g << 8) | (b << 0) | (a << 24)); \
102 } while(0)
103
104 #define WRITE_PIXEL(_x, _y, p) \
105 do { \
106 GLuint * _p = (GLuint *) GET_DST_PTR(_x, _y); \
107 _p[0] = p; \
108 } while(0)
109
110 # if defined( USE_X86_ASM )
111 # define READ_RGBA(rgba, _x, _y) \
112 do { \
113 GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y); \
114 __asm__ __volatile__( "bswap %0; rorl $8, %0" \
115 : "=r" (p) : "r" (p) ); \
116 ((GLuint *)rgba)[0] = p; \
117 } while (0)
118 # elif defined( MESA_BIG_ENDIAN )
119 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
120 * rotlwi instruction. It also produces good code on SPARC.
121 */
122 # define READ_RGBA( rgba, _x, _y ) \
123 do { \
124 GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y); \
125 GLuint t = p; \
126 *((uint32_t *) rgba) = (t >> 24) | (p << 8); \
127 } while (0)
128 # else
129 # define READ_RGBA( rgba, _x, _y ) \
130 do { \
131 GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y); \
132 rgba[0] = (p >> 16) & 0xff; \
133 rgba[1] = (p >> 8) & 0xff; \
134 rgba[2] = (p >> 0) & 0xff; \
135 rgba[3] = (p >> 24) & 0xff; \
136 } while (0)
137 # endif
138
139 #else
140 #error SPANTMP_PIXEL_FMT must be set to a valid value!
141 #endif
142
143 #if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM )
144 #include "x86/read_rgba_span_x86.h"
145 #include "x86/common_x86_asm.h"
146 #endif
147
148 static void TAG(WriteRGBASpan)( GLcontext *ctx,
149 struct gl_renderbuffer *rb,
150 GLuint n, GLint x, GLint y,
151 const void *values, const GLubyte mask[] )
152 {
153 HW_WRITE_LOCK()
154 {
155 const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
156 GLint x1;
157 GLint n1;
158 LOCAL_VARS;
159
160 y = Y_FLIP(y);
161
162 HW_WRITE_CLIPLOOP()
163 {
164 GLint i = 0;
165 CLIPSPAN(x,y,n,x1,n1,i);
166
167 if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
168 (int)i, (int)n1, (int)x1);
169
170 if (mask)
171 {
172 for (;n1>0;i++,x1++,n1--)
173 if (mask[i])
174 WRITE_RGBA( x1, y,
175 rgba[i][0], rgba[i][1],
176 rgba[i][2], rgba[i][3] );
177 }
178 else
179 {
180 for (;n1>0;i++,x1++,n1--)
181 WRITE_RGBA( x1, y,
182 rgba[i][0], rgba[i][1],
183 rgba[i][2], rgba[i][3] );
184 }
185 }
186 HW_ENDCLIPLOOP();
187 }
188 HW_WRITE_UNLOCK();
189 }
190
191 static void TAG(WriteRGBSpan)( GLcontext *ctx,
192 struct gl_renderbuffer *rb,
193 GLuint n, GLint x, GLint y,
194 const void *values, const GLubyte mask[] )
195 {
196 HW_WRITE_LOCK()
197 {
198 const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
199 GLint x1;
200 GLint n1;
201 LOCAL_VARS;
202
203 y = Y_FLIP(y);
204
205 HW_WRITE_CLIPLOOP()
206 {
207 GLint i = 0;
208 CLIPSPAN(x,y,n,x1,n1,i);
209
210 if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
211 (int)i, (int)n1, (int)x1);
212
213 if (mask)
214 {
215 for (;n1>0;i++,x1++,n1--)
216 if (mask[i])
217 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
218 }
219 else
220 {
221 for (;n1>0;i++,x1++,n1--)
222 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
223 }
224 }
225 HW_ENDCLIPLOOP();
226 }
227 HW_WRITE_UNLOCK();
228 }
229
230 static void TAG(WriteRGBAPixels)( GLcontext *ctx,
231 struct gl_renderbuffer *rb,
232 GLuint n, const GLint x[], const GLint y[],
233 const void *values, const GLubyte mask[] )
234 {
235 HW_WRITE_LOCK()
236 {
237 const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
238 GLint i;
239 LOCAL_VARS;
240
241 if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
242
243 HW_WRITE_CLIPLOOP()
244 {
245 if (mask)
246 {
247 for (i=0;i<n;i++)
248 {
249 if (mask[i]) {
250 const int fy = Y_FLIP(y[i]);
251 if (CLIPPIXEL(x[i],fy))
252 WRITE_RGBA( x[i], fy,
253 rgba[i][0], rgba[i][1],
254 rgba[i][2], rgba[i][3] );
255 }
256 }
257 }
258 else
259 {
260 for (i=0;i<n;i++)
261 {
262 const int fy = Y_FLIP(y[i]);
263 if (CLIPPIXEL(x[i],fy))
264 WRITE_RGBA( x[i], fy,
265 rgba[i][0], rgba[i][1],
266 rgba[i][2], rgba[i][3] );
267 }
268 }
269 }
270 HW_ENDCLIPLOOP();
271 }
272 HW_WRITE_UNLOCK();
273 }
274
275
276 static void TAG(WriteMonoRGBASpan)( GLcontext *ctx,
277 struct gl_renderbuffer *rb,
278 GLuint n, GLint x, GLint y,
279 const void *value, const GLubyte mask[] )
280 {
281 HW_WRITE_LOCK()
282 {
283 const GLubyte *color = (const GLubyte *) value;
284 GLint x1;
285 GLint n1;
286 LOCAL_VARS;
287 INIT_MONO_PIXEL(p, color);
288
289 y = Y_FLIP( y );
290
291 if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n");
292
293 HW_WRITE_CLIPLOOP()
294 {
295 GLint i = 0;
296 CLIPSPAN(x,y,n,x1,n1,i);
297 if (mask)
298 {
299 for (;n1>0;i++,x1++,n1--)
300 if (mask[i])
301 WRITE_PIXEL( x1, y, p );
302 }
303 else
304 {
305 for (;n1>0;i++,x1++,n1--)
306 WRITE_PIXEL( x1, y, p );
307 }
308 }
309 HW_ENDCLIPLOOP();
310 }
311 HW_WRITE_UNLOCK();
312 }
313
314
315 static void TAG(WriteMonoRGBAPixels)( GLcontext *ctx,
316 struct gl_renderbuffer *rb,
317 GLuint n,
318 const GLint x[], const GLint y[],
319 const void *value,
320 const GLubyte mask[] )
321 {
322 HW_WRITE_LOCK()
323 {
324 const GLubyte *color = (const GLubyte *) value;
325 GLint i;
326 LOCAL_VARS;
327 INIT_MONO_PIXEL(p, color);
328
329 if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n");
330
331 HW_WRITE_CLIPLOOP()
332 {
333 if (mask)
334 {
335 for (i=0;i<n;i++)
336 if (mask[i]) {
337 int fy = Y_FLIP(y[i]);
338 if (CLIPPIXEL( x[i], fy ))
339 WRITE_PIXEL( x[i], fy, p );
340 }
341 }
342 else
343 {
344 for (i=0;i<n;i++) {
345 int fy = Y_FLIP(y[i]);
346 if (CLIPPIXEL( x[i], fy ))
347 WRITE_PIXEL( x[i], fy, p );
348 }
349 }
350 }
351 HW_ENDCLIPLOOP();
352 }
353 HW_WRITE_UNLOCK();
354 }
355
356
357 static void TAG(ReadRGBASpan)( GLcontext *ctx,
358 struct gl_renderbuffer *rb,
359 GLuint n, GLint x, GLint y, void *values)
360 {
361 HW_READ_LOCK()
362 {
363 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
364 GLint x1,n1;
365 LOCAL_VARS;
366
367 y = Y_FLIP(y);
368
369 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
370
371 HW_READ_CLIPLOOP()
372 {
373 GLint i = 0;
374 CLIPSPAN(x,y,n,x1,n1,i);
375 for (;n1>0;i++,x1++,n1--)
376 READ_RGBA( rgba[i], x1, y );
377 }
378 HW_ENDCLIPLOOP();
379 }
380 HW_READ_UNLOCK();
381 }
382
383
384 #if defined(USE_MMX_ASM) && \
385 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
386 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
387 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
388 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
389 static void TAG2(ReadRGBASpan,_MMX)( GLcontext *ctx,
390 GLuint n, GLint x, GLint y,
391 GLubyte rgba[][4])
392 {
393 #ifndef USE_INNER_EMMS
394 /* The EMMS instruction is directly in-lined here because using GCC's
395 * built-in _mm_empty function was found to utterly destroy performance.
396 */
397 __asm__ __volatile__( "emms" );
398 #endif
399
400 HW_READ_LOCK()
401 {
402 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
403 GLint x1,n1;
404 LOCAL_VARS;
405
406 y = Y_FLIP(y);
407
408 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
409
410 HW_READ_CLIPLOOP()
411 {
412 GLint i = 0;
413 CLIPSPAN(x,y,n,x1,n1,i);
414
415 {
416 const char * src = GET_SRC_PTR( x1, y );
417 #if (SPANTMP_PIXEL_FMT == GL_RGB) && \
418 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
419 _generic_read_RGBA_span_RGB565_MMX( src, rgba[i], n1 );
420 #else
421 _generic_read_RGBA_span_BGRA8888_REV_MMX( src, rgba[i], n1 );
422 #endif
423 }
424 }
425 HW_ENDCLIPLOOP();
426 }
427 HW_READ_UNLOCK();
428 #ifndef USE_INNER_EMMS
429 __asm__ __volatile__( "emms" );
430 #endif
431 }
432 #endif
433
434
435 #if defined(USE_SSE_ASM) && \
436 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
437 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
438 static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx,
439 struct gl_renderbuffer *rb,
440 GLuint n, GLint x, GLint y,
441 void *values)
442 {
443 HW_READ_LOCK()
444 {
445 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
446 GLint x1,n1;
447 LOCAL_VARS;
448
449 y = Y_FLIP(y);
450
451 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
452
453 HW_READ_CLIPLOOP()
454 {
455 GLint i = 0;
456 CLIPSPAN(x,y,n,x1,n1,i);
457
458 {
459 const char * src = GET_SRC_PTR( x1, y );
460 _generic_read_RGBA_span_BGRA8888_REV_SSE2( src, rgba[i], n1 );
461 }
462 }
463 HW_ENDCLIPLOOP();
464 }
465 HW_READ_UNLOCK();
466 }
467 #endif
468
469 #if defined(USE_SSE_ASM) && \
470 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
471 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
472 static void TAG2(ReadRGBASpan,_SSE)( GLcontext *ctx,
473 struct gl_renderbuffer *rb,
474 GLuint n, GLint x, GLint y,
475 void *values)
476 {
477 #ifndef USE_INNER_EMMS
478 /* The EMMS instruction is directly in-lined here because using GCC's
479 * built-in _mm_empty function was found to utterly destroy performance.
480 */
481 __asm__ __volatile__( "emms" );
482 #endif
483
484 HW_READ_LOCK()
485 {
486 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
487 GLint x1,n1;
488 LOCAL_VARS;
489
490 y = Y_FLIP(y);
491
492 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
493
494 HW_READ_CLIPLOOP()
495 {
496 GLint i = 0;
497 CLIPSPAN(x,y,n,x1,n1,i);
498
499 {
500 const char * src = GET_SRC_PTR( x1, y );
501 _generic_read_RGBA_span_BGRA8888_REV_SSE( src, rgba[i], n1 );
502 }
503 }
504 HW_ENDCLIPLOOP();
505 }
506 HW_READ_UNLOCK();
507 #ifndef USE_INNER_EMMS
508 __asm__ __volatile__( "emms" );
509 #endif
510 }
511 #endif
512
513
514 static void TAG(ReadRGBAPixels)( GLcontext *ctx,
515 struct gl_renderbuffer *rb,
516 GLuint n, const GLint x[], const GLint y[],
517 void *values )
518 {
519 HW_READ_LOCK()
520 {
521 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
522 GLubyte *mask = NULL; /* remove someday */
523 GLint i;
524 LOCAL_VARS;
525
526 if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
527
528 HW_READ_CLIPLOOP()
529 {
530 if (mask)
531 {
532 for (i=0;i<n;i++)
533 if (mask[i]) {
534 int fy = Y_FLIP( y[i] );
535 if (CLIPPIXEL( x[i], fy ))
536 READ_RGBA( rgba[i], x[i], fy );
537 }
538 }
539 else
540 {
541 for (i=0;i<n;i++) {
542 int fy = Y_FLIP( y[i] );
543 if (CLIPPIXEL( x[i], fy ))
544 READ_RGBA( rgba[i], x[i], fy );
545 }
546 }
547 }
548 HW_ENDCLIPLOOP();
549 }
550 HW_READ_UNLOCK();
551 }
552
553 static void TAG(InitPointers)(struct gl_renderbuffer *rb)
554 {
555 rb->PutRow = TAG(WriteRGBASpan);
556 rb->PutRowRGB = TAG(WriteRGBSpan);
557 rb->PutMonoRow = TAG(WriteMonoRGBASpan);
558 rb->PutValues = TAG(WriteRGBAPixels);
559 rb->PutMonoValues = TAG(WriteMonoRGBAPixels);
560 rb->GetValues = TAG(ReadRGBAPixels);
561
562 #if defined(USE_SSE_ASM) && \
563 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
564 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
565 if ( cpu_has_xmm2 ) {
566 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE2" );
567 rb->GetRow = TAG2(ReadRGBASpan, _SSE2);
568 }
569 else
570 #endif
571 #if defined(USE_SSE_ASM) && \
572 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
573 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
574 if ( cpu_has_xmm ) {
575 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE" );
576 rb->GetRow = TAG2(ReadRGBASpan, _SSE);
577 }
578 else
579 #endif
580 #if defined(USE_MMX_ASM) && \
581 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
582 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
583 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
584 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
585 if ( cpu_has_mmx ) {
586 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "MMX" );
587 rb->GetRow = TAG2(ReadRGBASpan, _MMX);
588 }
589 else
590 #endif
591 {
592 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "C" );
593 rb->GetRow = TAG(ReadRGBASpan);
594 }
595
596 }
597
598
599 #undef INIT_MONO_PIXEL
600 #undef WRITE_PIXEL
601 #undef WRITE_RGBA
602 #undef READ_RGBA
603 #undef TAG
604 #undef TAG2
605 #undef GET_SRC_PTR
606 #undef GET_DST_PTR
607 #undef SPANTMP_PIXEL_FMT
608 #undef SPANTMP_PIXEL_TYPE