Merge commit 'origin/gallium-0.1' into gallium-0.2
[mesa.git] / src / mesa / drivers / dri / common / spantmp2.h
1 /*
2 * Copyright 2000-2001 VA Linux Systems, Inc.
3 * (C) Copyright IBM Corporation 2004
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file spantmp2.h
28 *
29 * Template file of span read / write functions.
30 *
31 * \author Keith Whitwell <keithw@tungstengraphics.com>
32 * \author Gareth Hughes <gareth@nvidia.com>
33 * \author Ian Romanick <idr@us.ibm.com>
34 */
35
36 #include "main/colormac.h"
37 #include "spantmp_common.h"
38
39 #ifndef DBG
40 #define DBG 0
41 #endif
42
43 #ifndef HW_READ_CLIPLOOP
44 #define HW_READ_CLIPLOOP() HW_CLIPLOOP()
45 #endif
46
47 #ifndef HW_WRITE_CLIPLOOP
48 #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP()
49 #endif
50
51 #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
52
53 /**
54 ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5
55 **/
56
57 #ifndef GET_VALUE
58 #ifndef GET_PTR
59 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
60 #endif
61
62 #define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
63 #define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
64 #endif /* GET_VALUE */
65
66 #define INIT_MONO_PIXEL(p, color) \
67 p = PACK_COLOR_565( color[0], color[1], color[2] )
68
69 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
70 PUT_VALUE(_x, _y, ((((int)r & 0xf8) << 8) | \
71 (((int)g & 0xfc) << 3) | \
72 (((int)b & 0xf8) >> 3))) \
73
74 #define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
75
76 #define READ_RGBA( rgba, _x, _y ) \
77 do { \
78 GLushort p = GET_VALUE(_x, _y); \
79 rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \
80 rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \
81 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
82 rgba[3] = 0xff; \
83 } while (0)
84
85 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
86
87 /**
88 ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV
89 **/
90
91 #ifndef GET_VALUE
92 #ifndef GET_PTR
93 #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
94 #endif
95
96 #define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
97 #define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
98 #endif /* GET_VALUE */
99
100 # define INIT_MONO_PIXEL(p, color) \
101 p = PACK_COLOR_8888(color[3], color[0], color[1], color[2])
102
103 # define WRITE_RGBA(_x, _y, r, g, b, a) \
104 PUT_VALUE(_x, _y, ((r << 16) | \
105 (g << 8) | \
106 (b << 0) | \
107 (a << 24)))
108
109 #define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
110
111 # if defined( USE_X86_ASM )
112 # define READ_RGBA(rgba, _x, _y) \
113 do { \
114 GLuint p = GET_VALUE(_x, _y); \
115 __asm__ __volatile__( "bswap %0; rorl $8, %0" \
116 : "=r" (p) : "0" (p) ); \
117 ((GLuint *)rgba)[0] = p; \
118 } while (0)
119 # elif defined( MESA_BIG_ENDIAN )
120 /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
121 * rotlwi instruction. It also produces good code on SPARC.
122 */
123 # define READ_RGBA( rgba, _x, _y ) \
124 do { \
125 GLuint p = GET_VALUE(_x, _y); \
126 GLuint t = p; \
127 *((uint32_t *) rgba) = (t >> 24) | (p << 8); \
128 } while (0)
129 # else
130 # define READ_RGBA( rgba, _x, _y ) \
131 do { \
132 GLuint p = GET_VALUE(_x, _y); \
133 rgba[0] = (p >> 16) & 0xff; \
134 rgba[1] = (p >> 8) & 0xff; \
135 rgba[2] = (p >> 0) & 0xff; \
136 rgba[3] = (p >> 24) & 0xff; \
137 } while (0)
138 # endif
139
140 #else
141 #error SPANTMP_PIXEL_FMT must be set to a valid value!
142 #endif
143
144
145
146 /**
147 ** Assembly routines.
148 **/
149
150 #if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM )
151 #include "x86/read_rgba_span_x86.h"
152 #include "x86/common_x86_asm.h"
153 #endif
154
155 static void TAG(WriteRGBASpan)( GLcontext *ctx,
156 struct gl_renderbuffer *rb,
157 GLuint n, GLint x, GLint y,
158 const void *values, const GLubyte mask[] )
159 {
160 HW_WRITE_LOCK()
161 {
162 const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
163 GLint x1;
164 GLint n1;
165 LOCAL_VARS;
166
167 y = Y_FLIP(y);
168
169 HW_WRITE_CLIPLOOP()
170 {
171 GLint i = 0;
172 CLIPSPAN(x,y,n,x1,n1,i);
173
174 if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
175 (int)i, (int)n1, (int)x1);
176
177 if (mask)
178 {
179 for (;n1>0;i++,x1++,n1--)
180 if (mask[i])
181 WRITE_RGBA( x1, y,
182 rgba[i][0], rgba[i][1],
183 rgba[i][2], rgba[i][3] );
184 }
185 else
186 {
187 for (;n1>0;i++,x1++,n1--)
188 WRITE_RGBA( x1, y,
189 rgba[i][0], rgba[i][1],
190 rgba[i][2], rgba[i][3] );
191 }
192 }
193 HW_ENDCLIPLOOP();
194 }
195 HW_WRITE_UNLOCK();
196 }
197
198 static void TAG(WriteRGBSpan)( GLcontext *ctx,
199 struct gl_renderbuffer *rb,
200 GLuint n, GLint x, GLint y,
201 const void *values, const GLubyte mask[] )
202 {
203 HW_WRITE_LOCK()
204 {
205 const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
206 GLint x1;
207 GLint n1;
208 LOCAL_VARS;
209
210 y = Y_FLIP(y);
211
212 HW_WRITE_CLIPLOOP()
213 {
214 GLint i = 0;
215 CLIPSPAN(x,y,n,x1,n1,i);
216
217 if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
218 (int)i, (int)n1, (int)x1);
219
220 if (mask)
221 {
222 for (;n1>0;i++,x1++,n1--)
223 if (mask[i])
224 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
225 }
226 else
227 {
228 for (;n1>0;i++,x1++,n1--)
229 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
230 }
231 }
232 HW_ENDCLIPLOOP();
233 }
234 HW_WRITE_UNLOCK();
235 }
236
237 static void TAG(WriteRGBAPixels)( GLcontext *ctx,
238 struct gl_renderbuffer *rb,
239 GLuint n, const GLint x[], const GLint y[],
240 const void *values, const GLubyte mask[] )
241 {
242 HW_WRITE_LOCK()
243 {
244 const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
245 GLint i;
246 LOCAL_VARS;
247
248 if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
249
250 HW_WRITE_CLIPLOOP()
251 {
252 if (mask)
253 {
254 for (i=0;i<n;i++)
255 {
256 if (mask[i]) {
257 const int fy = Y_FLIP(y[i]);
258 if (CLIPPIXEL(x[i],fy))
259 WRITE_RGBA( x[i], fy,
260 rgba[i][0], rgba[i][1],
261 rgba[i][2], rgba[i][3] );
262 }
263 }
264 }
265 else
266 {
267 for (i=0;i<n;i++)
268 {
269 const int fy = Y_FLIP(y[i]);
270 if (CLIPPIXEL(x[i],fy))
271 WRITE_RGBA( x[i], fy,
272 rgba[i][0], rgba[i][1],
273 rgba[i][2], rgba[i][3] );
274 }
275 }
276 }
277 HW_ENDCLIPLOOP();
278 }
279 HW_WRITE_UNLOCK();
280 }
281
282
283 static void TAG(WriteMonoRGBASpan)( GLcontext *ctx,
284 struct gl_renderbuffer *rb,
285 GLuint n, GLint x, GLint y,
286 const void *value, const GLubyte mask[] )
287 {
288 HW_WRITE_LOCK()
289 {
290 const GLubyte *color = (const GLubyte *) value;
291 GLint x1;
292 GLint n1;
293 LOCAL_VARS;
294 INIT_MONO_PIXEL(p, color);
295
296 y = Y_FLIP( y );
297
298 if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n");
299
300 HW_WRITE_CLIPLOOP()
301 {
302 GLint i = 0;
303 CLIPSPAN(x,y,n,x1,n1,i);
304 if (mask)
305 {
306 for (;n1>0;i++,x1++,n1--)
307 if (mask[i])
308 WRITE_PIXEL( x1, y, p );
309 }
310 else
311 {
312 for (;n1>0;i++,x1++,n1--)
313 WRITE_PIXEL( x1, y, p );
314 }
315 }
316 HW_ENDCLIPLOOP();
317 }
318 HW_WRITE_UNLOCK();
319 }
320
321
322 static void TAG(WriteMonoRGBAPixels)( GLcontext *ctx,
323 struct gl_renderbuffer *rb,
324 GLuint n,
325 const GLint x[], const GLint y[],
326 const void *value,
327 const GLubyte mask[] )
328 {
329 HW_WRITE_LOCK()
330 {
331 const GLubyte *color = (const GLubyte *) value;
332 GLint i;
333 LOCAL_VARS;
334 INIT_MONO_PIXEL(p, color);
335
336 if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n");
337
338 HW_WRITE_CLIPLOOP()
339 {
340 if (mask)
341 {
342 for (i=0;i<n;i++)
343 if (mask[i]) {
344 int fy = Y_FLIP(y[i]);
345 if (CLIPPIXEL( x[i], fy ))
346 WRITE_PIXEL( x[i], fy, p );
347 }
348 }
349 else
350 {
351 for (i=0;i<n;i++) {
352 int fy = Y_FLIP(y[i]);
353 if (CLIPPIXEL( x[i], fy ))
354 WRITE_PIXEL( x[i], fy, p );
355 }
356 }
357 }
358 HW_ENDCLIPLOOP();
359 }
360 HW_WRITE_UNLOCK();
361 }
362
363
364 static void TAG(ReadRGBASpan)( GLcontext *ctx,
365 struct gl_renderbuffer *rb,
366 GLuint n, GLint x, GLint y, void *values)
367 {
368 HW_READ_LOCK()
369 {
370 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
371 GLint x1,n1;
372 LOCAL_VARS;
373
374 y = Y_FLIP(y);
375
376 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
377
378 HW_READ_CLIPLOOP()
379 {
380 GLint i = 0;
381 CLIPSPAN(x,y,n,x1,n1,i);
382 for (;n1>0;i++,x1++,n1--)
383 READ_RGBA( rgba[i], x1, y );
384 }
385 HW_ENDCLIPLOOP();
386 }
387 HW_READ_UNLOCK();
388 }
389
390
391 #if defined(GET_PTR) && \
392 defined(USE_MMX_ASM) && \
393 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
394 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
395 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
396 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
397 static void TAG2(ReadRGBASpan,_MMX)( GLcontext *ctx,
398 struct gl_renderbuffer *rb,
399 GLuint n, GLint x, GLint y, void *values)
400 {
401 #ifndef USE_INNER_EMMS
402 /* The EMMS instruction is directly in-lined here because using GCC's
403 * built-in _mm_empty function was found to utterly destroy performance.
404 */
405 __asm__ __volatile__( "emms" );
406 #endif
407
408 HW_READ_LOCK()
409 {
410 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
411 GLint x1,n1;
412 LOCAL_VARS;
413
414 y = Y_FLIP(y);
415
416 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
417
418 HW_READ_CLIPLOOP()
419 {
420 GLint i = 0;
421 CLIPSPAN(x,y,n,x1,n1,i);
422
423 {
424 const void * src = GET_PTR( x1, y );
425 #if (SPANTMP_PIXEL_FMT == GL_RGB) && \
426 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
427 _generic_read_RGBA_span_RGB565_MMX( src, rgba[i], n1 );
428 #else
429 _generic_read_RGBA_span_BGRA8888_REV_MMX( src, rgba[i], n1 );
430 #endif
431 }
432 }
433 HW_ENDCLIPLOOP();
434 }
435 HW_READ_UNLOCK();
436 #ifndef USE_INNER_EMMS
437 __asm__ __volatile__( "emms" );
438 #endif
439 }
440 #endif
441
442
443 #if defined(GET_PTR) && \
444 defined(USE_SSE_ASM) && \
445 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
446 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
447 static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx,
448 struct gl_renderbuffer *rb,
449 GLuint n, GLint x, GLint y,
450 void *values)
451 {
452 HW_READ_LOCK()
453 {
454 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
455 GLint x1,n1;
456 LOCAL_VARS;
457
458 y = Y_FLIP(y);
459
460 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
461
462 HW_READ_CLIPLOOP()
463 {
464 GLint i = 0;
465 CLIPSPAN(x,y,n,x1,n1,i);
466
467 {
468 const void * src = GET_PTR( x1, y );
469 _generic_read_RGBA_span_BGRA8888_REV_SSE2( src, rgba[i], n1 );
470 }
471 }
472 HW_ENDCLIPLOOP();
473 }
474 HW_READ_UNLOCK();
475 }
476 #endif
477
478 #if defined(GET_PTR) && \
479 defined(USE_SSE_ASM) && \
480 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
481 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
482 static void TAG2(ReadRGBASpan,_SSE)( GLcontext *ctx,
483 struct gl_renderbuffer *rb,
484 GLuint n, GLint x, GLint y,
485 void *values)
486 {
487 #ifndef USE_INNER_EMMS
488 /* The EMMS instruction is directly in-lined here because using GCC's
489 * built-in _mm_empty function was found to utterly destroy performance.
490 */
491 __asm__ __volatile__( "emms" );
492 #endif
493
494 HW_READ_LOCK()
495 {
496 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
497 GLint x1,n1;
498 LOCAL_VARS;
499
500 y = Y_FLIP(y);
501
502 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
503
504 HW_READ_CLIPLOOP()
505 {
506 GLint i = 0;
507 CLIPSPAN(x,y,n,x1,n1,i);
508
509 {
510 const void * src = GET_PTR( x1, y );
511 _generic_read_RGBA_span_BGRA8888_REV_SSE( src, rgba[i], n1 );
512 }
513 }
514 HW_ENDCLIPLOOP();
515 }
516 HW_READ_UNLOCK();
517 #ifndef USE_INNER_EMMS
518 __asm__ __volatile__( "emms" );
519 #endif
520 }
521 #endif
522
523
524 static void TAG(ReadRGBAPixels)( GLcontext *ctx,
525 struct gl_renderbuffer *rb,
526 GLuint n, const GLint x[], const GLint y[],
527 void *values )
528 {
529 HW_READ_LOCK()
530 {
531 GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
532 GLubyte *mask = NULL; /* remove someday */
533 GLint i;
534 LOCAL_VARS;
535
536 if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
537
538 HW_READ_CLIPLOOP()
539 {
540 if (mask)
541 {
542 for (i=0;i<n;i++)
543 if (mask[i]) {
544 int fy = Y_FLIP( y[i] );
545 if (CLIPPIXEL( x[i], fy ))
546 READ_RGBA( rgba[i], x[i], fy );
547 }
548 }
549 else
550 {
551 for (i=0;i<n;i++) {
552 int fy = Y_FLIP( y[i] );
553 if (CLIPPIXEL( x[i], fy ))
554 READ_RGBA( rgba[i], x[i], fy );
555 }
556 }
557 }
558 HW_ENDCLIPLOOP();
559 }
560 HW_READ_UNLOCK();
561 }
562
563 static void TAG(InitPointers)(struct gl_renderbuffer *rb)
564 {
565 rb->PutRow = TAG(WriteRGBASpan);
566 rb->PutRowRGB = TAG(WriteRGBSpan);
567 rb->PutMonoRow = TAG(WriteMonoRGBASpan);
568 rb->PutValues = TAG(WriteRGBAPixels);
569 rb->PutMonoValues = TAG(WriteMonoRGBAPixels);
570 rb->GetValues = TAG(ReadRGBAPixels);
571
572 #if defined(GET_PTR)
573 #if defined(USE_SSE_ASM) && \
574 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
575 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
576 if ( cpu_has_xmm2 ) {
577 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE2" );
578 rb->GetRow = TAG2(ReadRGBASpan, _SSE2);
579 }
580 else
581 #endif
582 #if defined(USE_SSE_ASM) && \
583 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
584 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
585 if ( cpu_has_xmm ) {
586 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE" );
587 rb->GetRow = TAG2(ReadRGBASpan, _SSE);
588 }
589 else
590 #endif
591 #if defined(USE_MMX_ASM) && \
592 (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
593 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
594 ((SPANTMP_PIXEL_FMT == GL_RGB) && \
595 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
596 if ( cpu_has_mmx ) {
597 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "MMX" );
598 rb->GetRow = TAG2(ReadRGBASpan, _MMX);
599 }
600 else
601 #endif
602 #endif /* GET_PTR */
603 {
604 if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "C" );
605 rb->GetRow = TAG(ReadRGBASpan);
606 }
607
608 }
609
610
611 #undef INIT_MONO_PIXEL
612 #undef WRITE_PIXEL
613 #undef WRITE_RGBA
614 #undef READ_RGBA
615 #undef TAG
616 #undef TAG2
617 #undef GET_VALUE
618 #undef PUT_VALUE
619 #undef GET_PTR
620 #undef SPANTMP_PIXEL_FMT
621 #undef SPANTMP_PIXEL_TYPE