Merge branch '965-glsl'
[mesa.git] / src / mesa / drivers / dri / common / spantmp2.h
1 /*
2 * Copyright 2000-2001 VA Linux Systems, Inc.
3 * (C) Copyright IBM Corporation 2004
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file spantmp2.h
28 *
29 * Template file of span read / write functions.
30 *
31 * \author Keith Whitwell <keithw@tungstengraphics.com>
32 * \author Gareth Hughes <gareth@nvidia.com>
33 * \author Ian Romanick <idr@us.ibm.com>
34 */
35
36 #include "colormac.h"
37 #include "spantmp_common.h"
38
39 #ifndef DBG
40 #define DBG 0
41 #endif
42
43 #ifndef HW_READ_CLIPLOOP
44 #define HW_READ_CLIPLOOP() HW_CLIPLOOP()
45 #endif
46
47 #ifndef HW_WRITE_CLIPLOOP
48 #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP()
49 #endif
50
51
52 #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
53
54 /**
55 ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5
56 **/
57
58 #ifndef GET_PTR
59 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
60 #endif
61
62 #define INIT_MONO_PIXEL(p, color) \
63 p = PACK_COLOR_565( color[0], color[1], color[2] )
64
65 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
66 do { \
67 GLshort * _p = (GLshort *) GET_PTR(_x, _y); \
68 _p[0] = ((((int)r & 0xf8) << 8) | (((int)g & 0xfc) << 3) | \
69 (((int)b & 0xf8) >> 3)); \
70 } while(0)
71
72 #define WRITE_PIXEL( _x, _y, p ) \
73 do { \
74 GLushort * _p = (GLushort *) GET_PTR(_x, _y); \
75 _p[0] = p; \
76 } while(0)
77
78 #define READ_RGBA( rgba, _x, _y ) \
79 do { \
80 GLushort p = *(volatile GLshort *) GET_PTR(_x, _y); \
81 rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \
82 rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \
83 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
84 rgba[3] = 0xff; \
85 } while (0)
86
87 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
88
89 /**
90 ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV
91 **/
92
93 #ifndef GET_PTR
94 #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
95 #endif
96
97 # define INIT_MONO_PIXEL(p, color) \
98 p = PACK_COLOR_8888(color[3], color[0], color[1], color[2])
99
100 # define WRITE_RGBA(_x, _y, r, g, b, a) \
101 do { \
102 GLuint * _p = (GLuint *) GET_PTR(_x, _y); \
103 _p[0] = ((r << 16) | (g << 8) | (b << 0) | (a << 24)); \
104 } while(0)
105
106 #define WRITE_PIXEL(_x, _y, p) \
107 do { \
108 GLuint * _p = (GLuint *) GET_PTR(_x, _y); \
109 _p[0] = p; \
110 } while(0)
111
112 # if defined( USE_X86_ASM )
113 # define READ_RGBA(rgba, _x, _y) \
114 do { \
115 GLuint p = *(volatile GLuint *) GET_PTR(_x, _y); \
116 __asm__ __volatile__( "bswap %0; rorl $8, %0" \
117 : "=r" (p) : "0" (p) ); \
118 ((GLuint *)rgba)[0] = p; \
119 } while (0)
120 # elif defined( MESA_BIG_ENDIAN )
121 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
122 * rotlwi instruction. It also produces good code on SPARC.
123 */
124 # define READ_RGBA( rgba, _x, _y ) \
125 do { \
126 GLuint p = *(volatile GLuint *) GET_PTR(_x, _y); \
127 GLuint t = p; \
128 *((uint32_t *) rgba) = (t >> 24) | (p << 8); \
129 } while (0)
130 # else
131 # define READ_RGBA( rgba, _x, _y ) \
132 do { \
133 GLuint p = *(volatile GLuint *) GET_PTR(_x, _y); \
134 rgba[0] = (p >> 16) & 0xff; \
135 rgba[1] = (p >> 8) & 0xff; \
136 rgba[2] = (p >> 0) & 0xff; \
137 rgba[3] = (p >> 24) & 0xff; \
138 } while (0)
139 # endif
140
141 #else
142 #error SPANTMP_PIXEL_FMT must be set to a valid value!
143 #endif
144
145
146
147 /**
148 ** Assembly routines.
149 **/
150
151 #if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM )
152 #include "x86/read_rgba_span_x86.h"
153 #include "x86/common_x86_asm.h"
154 #endif
155
156 static void TAG(WriteRGBASpan)( GLcontext *ctx,
157 struct gl_renderbuffer *rb,
158 GLuint n, GLint x, GLint y,
159 const void *values, const GLubyte mask[] )
160 {
161 HW_WRITE_LOCK()
162 {
163 const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
164 GLint x1;
165 GLint n1;
166 LOCAL_VARS;
167
168 y = Y_FLIP(y);
169
170 HW_WRITE_CLIPLOOP()
171 {
172 GLint i = 0;
173 CLIPSPAN(x,y,n,x1,n1,i);
174
175 if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
176 (int)i, (int)n1, (int)x1);
177
178 if (mask)
179 {
180 for (;n1>0;i++,x1++,n1--)
181 if (mask[i])
182 WRITE_RGBA( x1, y,
183 rgba[i][0], rgba[i][1],
184 rgba[i][2], rgba[i][3] );
185 }
186 else
187 {
188 for (;n1>0;i++,x1++,n1--)
189 WRITE_RGBA( x1, y,
190 rgba[i][0], rgba[i][1],
191 rgba[i][2], rgba[i][3] );
192 }
193 }
194 HW_ENDCLIPLOOP();
195 }
196 HW_WRITE_UNLOCK();
197 }
198
199 static void TAG(WriteRGBSpan)( GLcontext *ctx,
200 struct gl_renderbuffer *rb,
201 GLuint n, GLint x, GLint y,
202 const void *values, const GLubyte mask[] )
203 {
204 HW_WRITE_LOCK()
205 {
206 const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
207 GLint x1;
208 GLint n1;
209 LOCAL_VARS;
210
211 y = Y_FLIP(y);
212
213 HW_WRITE_CLIPLOOP()
214 {
215 GLint i = 0;
216 CLIPSPAN(x,y,n,x1,n1,i);
217
218 if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
219 (int)i, (int)n1, (int)x1);
220
221 if (mask)
222 {
223 for (;n1>0;i++,x1++,n1--)
224 if (mask[i])
225 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
226 }
227 else
228 {
229 for (;n1>0;i++,x1++,n1--)
230 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
231 }
232 }
233 HW_ENDCLIPLOOP();
234 }
235 HW_WRITE_UNLOCK();
236 }
237
238 static void TAG(WriteRGBAPixels)( GLcontext *ctx,
239 struct gl_renderbuffer *rb,
240 GLuint n, const GLint x[], const GLint y[],
241 const void *values, const GLubyte mask[] )
242 {
243 HW_WRITE_LOCK()
244 {
245 const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
246 GLint i;
247 LOCAL_VARS;
248
249 if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
250
251 HW_WRITE_CLIPLOOP()
252 {
253 if (mask)
254 {
255 for (i=0;i<n;i++)
256 {
257 if (mask[i]) {
258 const int fy = Y_FLIP(y[i]);
259 if (CLIPPIXEL(x[i],fy))
260 WRITE_RGBA( x[i], fy,
261 rgba[i][0], rgba[i][1],
262 rgba[i][2], rgba[i][3] );
263 }
264 }
265 }
266 else
267 {
268 for (i=0;i<n;i++)
269 {
270 const int fy = Y_FLIP(y[i]);
271 if (CLIPPIXEL(x[i],fy))
272 WRITE_RGBA( x[i], fy,
273 rgba[i][0], rgba[i][1],
274 rgba[i][2], rgba[i][3] );
275 }
276 }
277 }
278 HW_ENDCLIPLOOP();
279 }
280 HW_WRITE_UNLOCK();
281 }
282
283
284 static void TAG(WriteMonoRGBASpan)( GLcontext *ctx,
285 struct gl_renderbuffer *rb,
286 GLuint n, GLint x, GLint y,
287 const void *value, const GLubyte mask[] )
288 {
289 HW_WRITE_LOCK()
290 {
291 const GLubyte *color = (const GLubyte *) value;
292 GLint x1;
293 GLint n1;
294 LOCAL_VARS;
295 INIT_MONO_PIXEL(p, color);
296
297 y = Y_FLIP( y );
298
299 if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n");
300
301 HW_WRITE_CLIPLOOP()
302 {
303 GLint i = 0;
304 CLIPSPAN(x,y,n,x1,n1,i);
305 if (mask)
306 {
307 for (;n1>0;i++,x1++,n1--)
308 if (mask[i])
309 WRITE_PIXEL( x1, y, p );
310 }
311 else
312 {
313 for (;n1>0;i++,x1++,n1--)
314 WRITE_PIXEL( x1, y, p );
315 }
316 }
317 HW_ENDCLIPLOOP();
318 }
319 HW_WRITE_UNLOCK();
320 }
321
322
323 static void TAG(WriteMonoRGBAPixels)( GLcontext *ctx,
324 struct gl_renderbuffer *rb,
325 GLuint n,
326 const GLint x[], const GLint y[],
327 const void *value,
328 const GLubyte mask[] )
329 {
330 HW_WRITE_LOCK()
331 {
332 const GLubyte *color = (const GLubyte *) value;
333 GLint i;
334 LOCAL_VARS;
335 INIT_MONO_PIXEL(p, color);
336
337 if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n");
338
339 HW_WRITE_CLIPLOOP()
340 {
341 if (mask)
342 {
343 for (i=0;i<n;i++)
344 if (mask[i]) {
345 int fy = Y_FLIP(y[i]);
346 if (CLIPPIXEL( x[i], fy ))
347 WRITE_PIXEL( x[i], fy, p );
348 }
349 }
350 else
351 {
352 for (i=0;i<n;i++) {
353 int fy = Y_FLIP(y[i]);
354 if (CLIPPIXEL( x[i], fy ))
355 WRITE_PIXEL( x[i], fy, p );
356 }
357 }
358 }
359 HW_ENDCLIPLOOP();
360 }
361 HW_WRITE_UNLOCK();
362 }
363
364
365 static void TAG(ReadRGBASpan)( GLcontext *ctx,
366 struct gl_renderbuffer *rb,
367 GLuint n, GLint x, GLint y, void *values)
368 {
369 HW_READ_LOCK()
370 {
371 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
372 GLint x1,n1;
373 LOCAL_VARS;
374
375 y = Y_FLIP(y);
376
377 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
378
379 HW_READ_CLIPLOOP()
380 {
381 GLint i = 0;
382 CLIPSPAN(x,y,n,x1,n1,i);
383 for (;n1>0;i++,x1++,n1--)
384 READ_RGBA( rgba[i], x1, y );
385 }
386 HW_ENDCLIPLOOP();
387 }
388 HW_READ_UNLOCK();
389 }
390
391
392 #if defined(USE_MMX_ASM) && \
393 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
394 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
395 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
396 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
397 static void TAG2(ReadRGBASpan,_MMX)( GLcontext *ctx,
398 struct gl_renderbuffer *rb,
399 GLuint n, GLint x, GLint y, void *values)
400 {
401 #ifndef USE_INNER_EMMS
402 /* The EMMS instruction is directly in-lined here because using GCC's
403 * built-in _mm_empty function was found to utterly destroy performance.
404 */
405 __asm__ __volatile__( "emms" );
406 #endif
407
408 HW_READ_LOCK()
409 {
410 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
411 GLint x1,n1;
412 LOCAL_VARS;
413
414 y = Y_FLIP(y);
415
416 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
417
418 HW_READ_CLIPLOOP()
419 {
420 GLint i = 0;
421 CLIPSPAN(x,y,n,x1,n1,i);
422
423 {
424 const void * src = GET_PTR( x1, y );
425 #if (SPANTMP_PIXEL_FMT == GL_RGB) && \
426 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
427 _generic_read_RGBA_span_RGB565_MMX( src, rgba[i], n1 );
428 #else
429 _generic_read_RGBA_span_BGRA8888_REV_MMX( src, rgba[i], n1 );
430 #endif
431 }
432 }
433 HW_ENDCLIPLOOP();
434 }
435 HW_READ_UNLOCK();
436 #ifndef USE_INNER_EMMS
437 __asm__ __volatile__( "emms" );
438 #endif
439 }
440 #endif
441
442
443 #if defined(USE_SSE_ASM) && \
444 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
445 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
446 static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx,
447 struct gl_renderbuffer *rb,
448 GLuint n, GLint x, GLint y,
449 void *values)
450 {
451 HW_READ_LOCK()
452 {
453 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
454 GLint x1,n1;
455 LOCAL_VARS;
456
457 y = Y_FLIP(y);
458
459 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
460
461 HW_READ_CLIPLOOP()
462 {
463 GLint i = 0;
464 CLIPSPAN(x,y,n,x1,n1,i);
465
466 {
467 const void * src = GET_PTR( x1, y );
468 _generic_read_RGBA_span_BGRA8888_REV_SSE2( src, rgba[i], n1 );
469 }
470 }
471 HW_ENDCLIPLOOP();
472 }
473 HW_READ_UNLOCK();
474 }
475 #endif
476
477 #if defined(USE_SSE_ASM) && \
478 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
479 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
480 static void TAG2(ReadRGBASpan,_SSE)( GLcontext *ctx,
481 struct gl_renderbuffer *rb,
482 GLuint n, GLint x, GLint y,
483 void *values)
484 {
485 #ifndef USE_INNER_EMMS
486 /* The EMMS instruction is directly in-lined here because using GCC's
487 * built-in _mm_empty function was found to utterly destroy performance.
488 */
489 __asm__ __volatile__( "emms" );
490 #endif
491
492 HW_READ_LOCK()
493 {
494 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
495 GLint x1,n1;
496 LOCAL_VARS;
497
498 y = Y_FLIP(y);
499
500 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
501
502 HW_READ_CLIPLOOP()
503 {
504 GLint i = 0;
505 CLIPSPAN(x,y,n,x1,n1,i);
506
507 {
508 const void * src = GET_PTR( x1, y );
509 _generic_read_RGBA_span_BGRA8888_REV_SSE( src, rgba[i], n1 );
510 }
511 }
512 HW_ENDCLIPLOOP();
513 }
514 HW_READ_UNLOCK();
515 #ifndef USE_INNER_EMMS
516 __asm__ __volatile__( "emms" );
517 #endif
518 }
519 #endif
520
521
522 static void TAG(ReadRGBAPixels)( GLcontext *ctx,
523 struct gl_renderbuffer *rb,
524 GLuint n, const GLint x[], const GLint y[],
525 void *values )
526 {
527 HW_READ_LOCK()
528 {
529 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
530 GLubyte *mask = NULL; /* remove someday */
531 GLint i;
532 LOCAL_VARS;
533
534 if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
535
536 HW_READ_CLIPLOOP()
537 {
538 if (mask)
539 {
540 for (i=0;i<n;i++)
541 if (mask[i]) {
542 int fy = Y_FLIP( y[i] );
543 if (CLIPPIXEL( x[i], fy ))
544 READ_RGBA( rgba[i], x[i], fy );
545 }
546 }
547 else
548 {
549 for (i=0;i<n;i++) {
550 int fy = Y_FLIP( y[i] );
551 if (CLIPPIXEL( x[i], fy ))
552 READ_RGBA( rgba[i], x[i], fy );
553 }
554 }
555 }
556 HW_ENDCLIPLOOP();
557 }
558 HW_READ_UNLOCK();
559 }
560
561 static void TAG(InitPointers)(struct gl_renderbuffer *rb)
562 {
563 rb->PutRow = TAG(WriteRGBASpan);
564 rb->PutRowRGB = TAG(WriteRGBSpan);
565 rb->PutMonoRow = TAG(WriteMonoRGBASpan);
566 rb->PutValues = TAG(WriteRGBAPixels);
567 rb->PutMonoValues = TAG(WriteMonoRGBAPixels);
568 rb->GetValues = TAG(ReadRGBAPixels);
569
570 #if defined(USE_SSE_ASM) && \
571 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
572 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
573 if ( cpu_has_xmm2 ) {
574 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE2" );
575 rb->GetRow = TAG2(ReadRGBASpan, _SSE2);
576 }
577 else
578 #endif
579 #if defined(USE_SSE_ASM) && \
580 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
581 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
582 if ( cpu_has_xmm ) {
583 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE" );
584 rb->GetRow = TAG2(ReadRGBASpan, _SSE);
585 }
586 else
587 #endif
588 #if defined(USE_MMX_ASM) && \
589 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
590 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
591 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
592 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
593 if ( cpu_has_mmx ) {
594 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "MMX" );
595 rb->GetRow = TAG2(ReadRGBASpan, _MMX);
596 }
597 else
598 #endif
599 {
600 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "C" );
601 rb->GetRow = TAG(ReadRGBASpan);
602 }
603
604 }
605
606
607 #undef INIT_MONO_PIXEL
608 #undef WRITE_PIXEL
609 #undef WRITE_RGBA
610 #undef READ_RGBA
611 #undef TAG
612 #undef TAG2
613 #undef GET_PTR
614 #undef SPANTMP_PIXEL_FMT
615 #undef SPANTMP_PIXEL_TYPE