Fixed off by one errors in clipping.
[mesa.git] / src / mesa / drivers / dri / common / spantmp2.h
1 /*
2 * Copyright 2000-2001 VA Linux Systems, Inc.
3 * (C) Copyright IBM Corporation 2004
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file spantmp2.h
28 *
29 * Template file of span read / write functions.
30 *
31 * \author Keith Whitwell <keithw@tungstengraphics.com>
32 * \author Gareth Hughes <gareth@nvidia.com>
33 * \author Ian Romanick <idr@us.ibm.com>
34 */
35
36 #include "colormac.h"
37
38 #ifndef DBG
39 #define DBG 0
40 #endif
41
42 #ifndef HW_WRITE_LOCK
43 #define HW_WRITE_LOCK() HW_LOCK()
44 #endif
45
46 #ifndef HW_WRITE_UNLOCK
47 #define HW_WRITE_UNLOCK() HW_UNLOCK()
48 #endif
49
50 #ifndef HW_READ_LOCK
51 #define HW_READ_LOCK() HW_LOCK()
52 #endif
53
54 #ifndef HW_READ_UNLOCK
55 #define HW_READ_UNLOCK() HW_UNLOCK()
56 #endif
57
58 #ifndef HW_READ_CLIPLOOP
59 #define HW_READ_CLIPLOOP() HW_CLIPLOOP()
60 #endif
61
62 #ifndef HW_WRITE_CLIPLOOP
63 #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP()
64 #endif
65
66 #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
67
68 #define INIT_MONO_PIXEL(p, color) \
69 p = PACK_COLOR_565( color[0], color[1], color[2] )
70
71 #define WRITE_RGBA( _x, _y, r, g, b, a ) \
72 do { \
73 GLshort * _p = (GLshort *) GET_DST_PTR(_x, _y); \
74 _p[0] = ((((int)r & 0xf8) << 8) | (((int)g & 0xfc) << 3) | \
75 (((int)b & 0xf8) >> 3)); \
76 } while(0)
77
78 #define WRITE_PIXEL( _x, _y, p ) \
79 do { \
80 GLushort * _p = (GLushort *) GET_DST_PTR(_x, _y); \
81 _p[0] = p; \
82 } while(0)
83
84 #define READ_RGBA( rgba, _x, _y ) \
85 do { \
86 GLushort p = *(volatile GLshort *) GET_SRC_PTR(_x, _y); \
87 rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \
88 rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \
89 rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \
90 rgba[3] = 0xff; \
91 } while (0)
92
93 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
94
95 # define INIT_MONO_PIXEL(p, color) \
96 p = PACK_COLOR_8888(color[3], color[0], color[1], color[2])
97
98 # define WRITE_RGBA(_x, _y, r, g, b, a) \
99 do { \
100 GLuint * _p = (GLuint *) GET_DST_PTR(_x, _y); \
101 _p[0] = ((r << 16) | (g << 8) | (b << 0) | (a << 24)); \
102 } while(0)
103
104 #define WRITE_PIXEL(_x, _y, p) \
105 do { \
106 GLuint * _p = (GLuint *) GET_DST_PTR(_x, _y); \
107 _p[0] = p; \
108 } while(0)
109
110 # if defined( USE_X86_ASM )
111 # define READ_RGBA(rgba, _x, _y) \
112 do { \
113 GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y); \
114 __asm__ __volatile__( "bswap %0; rorl $8, %0" \
115 : "=r" (p) : "r" (p) ); \
116 ((GLuint *)rgba)[0] = p; \
117 } while (0)
118 # else
119 # define READ_RGBA( rgba, _x, _y ) \
120 do { \
121 GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y); \
122 rgba[0] = (p >> 16) & 0xff; \
123 rgba[1] = (p >> 8) & 0xff; \
124 rgba[2] = (p >> 0) & 0xff; \
125 rgba[3] = (p >> 24) & 0xff; \
126 } while (0)
127 # endif
128
129 #else
130 #error SPANTMP_PIXEL_FMT must be set to a valid value!
131 #endif
132
133 #if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM )
134 #include "x86/read_rgba_span_x86.h"
135 #include "x86/common_x86_asm.h"
136 #endif
137
138 static void TAG(WriteRGBASpan)( const GLcontext *ctx,
139 GLuint n, GLint x, GLint y,
140 const GLubyte rgba[][4],
141 const GLubyte mask[] )
142 {
143 HW_WRITE_LOCK()
144 {
145 GLint x1;
146 GLint n1;
147 LOCAL_VARS;
148
149 y = Y_FLIP(y);
150
151 HW_WRITE_CLIPLOOP()
152 {
153 GLint i = 0;
154 CLIPSPAN(x,y,n,x1,n1,i);
155
156 if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
157 (int)i, (int)n1, (int)x1);
158
159 if (mask)
160 {
161 for (;n1>0;i++,x1++,n1--)
162 if (mask[i])
163 WRITE_RGBA( x1, y,
164 rgba[i][0], rgba[i][1],
165 rgba[i][2], rgba[i][3] );
166 }
167 else
168 {
169 for (;n1>0;i++,x1++,n1--)
170 WRITE_RGBA( x1, y,
171 rgba[i][0], rgba[i][1],
172 rgba[i][2], rgba[i][3] );
173 }
174 }
175 HW_ENDCLIPLOOP();
176 }
177 HW_WRITE_UNLOCK();
178 }
179
180 static void TAG(WriteRGBSpan)( const GLcontext *ctx,
181 GLuint n, GLint x, GLint y,
182 const GLubyte rgb[][3],
183 const GLubyte mask[] )
184 {
185 HW_WRITE_LOCK()
186 {
187 GLint x1;
188 GLint n1;
189 LOCAL_VARS;
190
191 y = Y_FLIP(y);
192
193 HW_WRITE_CLIPLOOP()
194 {
195 GLint i = 0;
196 CLIPSPAN(x,y,n,x1,n1,i);
197
198 if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
199 (int)i, (int)n1, (int)x1);
200
201 if (mask)
202 {
203 for (;n1>0;i++,x1++,n1--)
204 if (mask[i])
205 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
206 }
207 else
208 {
209 for (;n1>0;i++,x1++,n1--)
210 WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
211 }
212 }
213 HW_ENDCLIPLOOP();
214 }
215 HW_WRITE_UNLOCK();
216 }
217
218 static void TAG(WriteRGBAPixels)( const GLcontext *ctx,
219 GLuint n,
220 const GLint x[],
221 const GLint y[],
222 const GLubyte rgba[][4],
223 const GLubyte mask[] )
224 {
225 HW_WRITE_LOCK()
226 {
227 GLint i;
228 LOCAL_VARS;
229
230 if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
231
232 HW_WRITE_CLIPLOOP()
233 {
234 if (mask)
235 {
236 for (i=0;i<n;i++)
237 {
238 if (mask[i]) {
239 const int fy = Y_FLIP(y[i]);
240 if (CLIPPIXEL(x[i],fy))
241 WRITE_RGBA( x[i], fy,
242 rgba[i][0], rgba[i][1],
243 rgba[i][2], rgba[i][3] );
244 }
245 }
246 }
247 else
248 {
249 for (i=0;i<n;i++)
250 {
251 const int fy = Y_FLIP(y[i]);
252 if (CLIPPIXEL(x[i],fy))
253 WRITE_RGBA( x[i], fy,
254 rgba[i][0], rgba[i][1],
255 rgba[i][2], rgba[i][3] );
256 }
257 }
258 }
259 HW_ENDCLIPLOOP();
260 }
261 HW_WRITE_UNLOCK();
262 }
263
264
265 static void TAG(WriteMonoRGBASpan)( const GLcontext *ctx,
266 GLuint n, GLint x, GLint y,
267 const GLchan color[4],
268 const GLubyte mask[] )
269 {
270 HW_WRITE_LOCK()
271 {
272 GLint x1;
273 GLint n1;
274 LOCAL_VARS;
275 INIT_MONO_PIXEL(p, color);
276
277 y = Y_FLIP( y );
278
279 if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n");
280
281 HW_WRITE_CLIPLOOP()
282 {
283 GLint i = 0;
284 CLIPSPAN(x,y,n,x1,n1,i);
285 if (mask)
286 {
287 for (;n1>0;i++,x1++,n1--)
288 if (mask[i])
289 WRITE_PIXEL( x1, y, p );
290 }
291 else
292 {
293 for (;n1>0;i++,x1++,n1--)
294 WRITE_PIXEL( x1, y, p );
295 }
296 }
297 HW_ENDCLIPLOOP();
298 }
299 HW_WRITE_UNLOCK();
300 }
301
302
303 static void TAG(WriteMonoRGBAPixels)( const GLcontext *ctx,
304 GLuint n,
305 const GLint x[], const GLint y[],
306 const GLchan color[],
307 const GLubyte mask[] )
308 {
309 HW_WRITE_LOCK()
310 {
311 GLint i;
312 LOCAL_VARS;
313 INIT_MONO_PIXEL(p, color);
314
315 if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n");
316
317 HW_WRITE_CLIPLOOP()
318 {
319 if (mask)
320 {
321 for (i=0;i<n;i++)
322 if (mask[i]) {
323 int fy = Y_FLIP(y[i]);
324 if (CLIPPIXEL( x[i], fy ))
325 WRITE_PIXEL( x[i], fy, p );
326 }
327 }
328 else
329 {
330 for (i=0;i<n;i++) {
331 int fy = Y_FLIP(y[i]);
332 if (CLIPPIXEL( x[i], fy ))
333 WRITE_PIXEL( x[i], fy, p );
334 }
335 }
336 }
337 HW_ENDCLIPLOOP();
338 }
339 HW_WRITE_UNLOCK();
340 }
341
342
343 static void TAG(ReadRGBASpan)( const GLcontext *ctx,
344 GLuint n, GLint x, GLint y,
345 GLubyte rgba[][4])
346 {
347 HW_READ_LOCK()
348 {
349 GLint x1,n1;
350 LOCAL_VARS;
351
352 y = Y_FLIP(y);
353
354 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
355
356 HW_READ_CLIPLOOP()
357 {
358 GLint i = 0;
359 CLIPSPAN(x,y,n,x1,n1,i);
360 for (;n1>0;i++,x1++,n1--)
361 READ_RGBA( rgba[i], x1, y );
362 }
363 HW_ENDCLIPLOOP();
364 }
365 HW_READ_UNLOCK();
366 }
367
368
369 #if defined(USE_MMX_ASM) && \
370 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
371 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
372 static void TAG2(ReadRGBASpan,_MMX)( const GLcontext *ctx,
373 GLuint n, GLint x, GLint y,
374 GLubyte rgba[][4])
375 {
376 #ifndef USE_INNER_EMMS
377 /* The EMMS instruction is directly in-lined here because using GCC's
378 * built-in _mm_empty function was found to utterly destroy performance.
379 */
380 __asm__ __volatile__( "emms" );
381 #endif
382
383 HW_LOCK()
384 {
385 GLint x1,n1;
386 LOCAL_VARS;
387
388 y = Y_FLIP(y);
389
390 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
391
392 HW_READ_CLIPLOOP()
393 {
394 GLint i = 0;
395 CLIPSPAN(x,y,n,x1,n1,i);
396
397 {
398 const char * src = GET_SRC_PTR( x1, y );
399 _generic_read_RGBA_span_BGRA8888_REV_MMX( src, rgba[i], n1 );
400 }
401 }
402 HW_ENDCLIPLOOP();
403 }
404 HW_UNLOCK();
405 #ifndef USE_INNER_EMMS
406 __asm__ __volatile__( "emms" );
407 #endif
408 }
409 #endif
410
411
412 #if defined(USE_SSE_ASM) && \
413 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
414 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
415 static void TAG2(ReadRGBASpan,_SSE2)( const GLcontext *ctx,
416 GLuint n, GLint x, GLint y,
417 GLubyte rgba[][4])
418 {
419 HW_LOCK()
420 {
421 GLint x1,n1;
422 LOCAL_VARS;
423
424 y = Y_FLIP(y);
425
426 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
427
428 HW_READ_CLIPLOOP()
429 {
430 GLint i = 0;
431 CLIPSPAN(x,y,n,x1,n1,i);
432
433 {
434 const char * src = GET_SRC_PTR( x1, y );
435 _generic_read_RGBA_span_BGRA8888_REV_SSE2( src, rgba[i], n1 );
436 }
437 }
438 HW_ENDCLIPLOOP();
439 }
440 HW_UNLOCK();
441 }
442 #endif
443
444 #if defined(USE_SSE_ASM) && \
445 (SPANTMP_PIXEL_FMT == GL_BGRA) && \
446 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
447 static void TAG2(ReadRGBASpan,_SSE)( const GLcontext *ctx,
448 GLuint n, GLint x, GLint y,
449 GLubyte rgba[][4])
450 {
451 #ifndef USE_INNER_EMMS
452 /* The EMMS instruction is directly in-lined here because using GCC's
453 * built-in _mm_empty function was found to utterly destroy performance.
454 */
455 __asm__ __volatile__( "emms" );
456 #endif
457
458 HW_LOCK()
459 {
460 GLint x1,n1;
461 LOCAL_VARS;
462
463 y = Y_FLIP(y);
464
465 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
466
467 HW_READ_CLIPLOOP()
468 {
469 GLint i = 0;
470 CLIPSPAN(x,y,n,x1,n1,i);
471
472 {
473 const char * src = GET_SRC_PTR( x1, y );
474 _generic_read_RGBA_span_BGRA8888_REV_SSE( src, rgba[i], n1 );
475 }
476 }
477 HW_ENDCLIPLOOP();
478 }
479 HW_UNLOCK();
480 #ifndef USE_INNER_EMMS
481 __asm__ __volatile__( "emms" );
482 #endif
483 }
484 #endif
485
486
487 static void TAG(ReadRGBAPixels)( const GLcontext *ctx,
488 GLuint n, const GLint x[], const GLint y[],
489 GLubyte rgba[][4], const GLubyte mask[] )
490 {
491 HW_READ_LOCK()
492 {
493 GLint i;
494 LOCAL_VARS;
495
496 if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
497
498 HW_READ_CLIPLOOP()
499 {
500 if (mask)
501 {
502 for (i=0;i<n;i++)
503 if (mask[i]) {
504 int fy = Y_FLIP( y[i] );
505 if (CLIPPIXEL( x[i], fy ))
506 READ_RGBA( rgba[i], x[i], fy );
507 }
508 }
509 else
510 {
511 for (i=0;i<n;i++) {
512 int fy = Y_FLIP( y[i] );
513 if (CLIPPIXEL( x[i], fy ))
514 READ_RGBA( rgba[i], x[i], fy );
515 }
516 }
517 }
518 HW_ENDCLIPLOOP();
519 }
520 HW_READ_UNLOCK();
521 }
522
523 static void TAG(InitPointers)(struct swrast_device_driver *swdd)
524 {
525 swdd->WriteRGBASpan = TAG(WriteRGBASpan);
526 swdd->WriteRGBSpan = TAG(WriteRGBSpan);
527 swdd->WriteMonoRGBASpan = TAG(WriteMonoRGBASpan);
528 swdd->WriteRGBAPixels = TAG(WriteRGBAPixels);
529 swdd->WriteMonoRGBAPixels = TAG(WriteMonoRGBAPixels);
530 swdd->ReadRGBAPixels = TAG(ReadRGBAPixels);
531
532 #if (SPANTMP_PIXEL_FMT == GL_BGRA) && \
533 (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
534 #if defined(USE_SSE_ASM)
535 if ( cpu_has_xmm2 ) {
536 if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "SSE2" );
537 swdd->ReadRGBASpan = TAG2(ReadRGBASpan, _SSE2);
538 }
539 else
540 #endif
541 #if defined(USE_SSE_ASM)
542 if ( cpu_has_xmm ) {
543 if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "SSE" );
544 swdd->ReadRGBASpan = TAG2(ReadRGBASpan, _SSE);
545 }
546 else
547 #endif
548 #if defined(USE_MMX_ASM)
549 if ( cpu_has_mmx ) {
550 if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "MMX" );
551 swdd->ReadRGBASpan = TAG2(ReadRGBASpan, _MMX);
552 }
553 else
554 #endif
555 #endif
556 {
557 if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "C" );
558 swdd->ReadRGBASpan = TAG(ReadRGBASpan);
559 }
560
561 }
562
563
564 #undef INIT_MONO_PIXEL
565 #undef WRITE_PIXEL
566 #undef WRITE_RGBA
567 #undef READ_RGBA
568 #undef TAG
569 #undef TAG2
570 #undef GET_SRC_PTR
571 #undef GET_DST_PTR
572 #undef SPANTMP_PIXEL_FMT
573 #undef SPANTMP_PIXEL_TYPE