Merge branch 'mesa_7_5_branch' into mesa_7_6_branch
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "swrast/swrast.h"
45
46 #include "radeon_common.h"
47 #include "radeon_lock.h"
48 #include "radeon_span.h"
49
50 #define DBG 0
51
52 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54
55 /* r200 depth buffer is always tiled - this is the formula
56 according to the docs unless I typo'ed in it
57 */
58 #if defined(RADEON_COMMON_FOR_R200)
59 static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
60 GLint x, GLint y)
61 {
62 GLubyte *ptr = rrb->bo->ptr;
63 GLint offset;
64 if (rrb->has_surface) {
65 offset = x * rrb->cpp + y * rrb->pitch;
66 } else {
67 GLuint b;
68 offset = 0;
69 b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6)));
70 offset += (b >> 1) << 12;
71 offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
72 offset += ((y >> 2) & 0x3) << 9;
73 offset += ((x >> 3) & 0x1) << 8;
74 offset += ((x >> 4) & 0x3) << 6;
75 offset += ((x >> 2) & 0x1) << 5;
76 offset += ((y >> 1) & 0x1) << 4;
77 offset += ((x >> 1) & 0x1) << 3;
78 offset += (y & 0x1) << 2;
79 offset += (x & 0x1) << 1;
80 }
81 return &ptr[offset];
82 }
83
84 static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
85 GLint x, GLint y)
86 {
87 GLubyte *ptr = rrb->bo->ptr;
88 GLint offset;
89 if (rrb->has_surface) {
90 offset = x * rrb->cpp + y * rrb->pitch;
91 } else {
92 GLuint b;
93 offset = 0;
94 b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
95 offset += (b >> 1) << 12;
96 offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
97 offset += ((y >> 2) & 0x3) << 9;
98 offset += ((x >> 2) & 0x1) << 8;
99 offset += ((x >> 3) & 0x3) << 6;
100 offset += ((y >> 1) & 0x1) << 5;
101 offset += ((x >> 1) & 0x1) << 4;
102 offset += (y & 0x1) << 3;
103 offset += (x & 0x1) << 2;
104 }
105 return &ptr[offset];
106 }
107 #endif
108
109 /* r600 tiling
110 * two main types:
111 * - 1D (akin to macro-linear/micro-tiled on older asics)
112 * - 2D (akin to macro-tiled/micro-tiled on older asics)
113 * only 1D tiling is implemented below
114 */
115 #if defined(RADEON_COMMON_FOR_R600)
116 static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
117 GLint x, GLint y, GLint is_depth, GLint is_stencil)
118 {
119 GLint element_bytes = rrb->cpp;
120 GLint num_samples = 1;
121 GLint tile_width = 8;
122 GLint tile_height = 8;
123 GLint tile_thickness = 1;
124 GLint pitch_elements = rrb->pitch / element_bytes;
125 GLint height = rrb->base.Height;
126 GLint z = 0;
127 GLint sample_number = 0;
128 /* */
129 GLint tile_bytes;
130 GLint tiles_per_row;
131 GLint tiles_per_slice;
132 GLint slice_offset;
133 GLint tile_row_index;
134 GLint tile_column_index;
135 GLint tile_offset;
136 GLint pixel_number = 0;
137 GLint element_offset;
138 GLint offset = 0;
139
140 tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
141 tiles_per_row = pitch_elements / tile_width;
142 tiles_per_slice = tiles_per_row * (height / tile_height);
143 slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
144 tile_row_index = y / tile_height;
145 tile_column_index = x / tile_width;
146 tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
147
148 if (is_depth) {
149 GLint pixel_offset = 0;
150
151 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
152 pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
153 pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
154 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
155 pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
156 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
157 switch (element_bytes) {
158 case 2:
159 pixel_offset = pixel_number * element_bytes * num_samples;
160 break;
161 case 4:
162 /* stencil and depth data are stored separately within a tile.
163 * stencil is stored in a contiguous tile before the depth tile.
164 * stencil element is 1 byte, depth element is 3 bytes.
165 * stencil tile is 64 bytes.
166 */
167 if (is_stencil)
168 pixel_offset = pixel_number * 1 * num_samples;
169 else
170 pixel_offset = (pixel_number * 3 * num_samples) + 64;
171 break;
172 }
173 element_offset = pixel_offset + (sample_number * element_bytes);
174 } else {
175 GLint sample_offset;
176
177 switch (element_bytes) {
178 case 1:
179 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
180 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
181 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
182 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
183 pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
184 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
185 break;
186 case 2:
187 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
188 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
189 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
190 pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
191 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
192 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
193 break;
194 case 4:
195 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
196 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
197 pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
198 pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
199 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
200 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
201 break;
202 }
203 sample_offset = sample_number * (tile_bytes / num_samples);
204 element_offset = sample_offset + (pixel_number * element_bytes);
205 }
206 offset = slice_offset + tile_offset + element_offset;
207 return offset;
208 }
209
210 /* depth buffers */
211 static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
212 GLint x, GLint y)
213 {
214 GLubyte *ptr = rrb->bo->ptr;
215 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
216 return &ptr[offset];
217 }
218
219 static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
220 GLint x, GLint y)
221 {
222 GLubyte *ptr = rrb->bo->ptr;
223 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
224 return &ptr[offset];
225 }
226
227 static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
228 GLint x, GLint y)
229 {
230 GLubyte *ptr = rrb->bo->ptr;
231 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
232 GLint offset;
233
234 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
235 offset = x * rrb->cpp + y * rrb->pitch;
236 } else {
237 offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
238 }
239 return &ptr[offset];
240 }
241
242 #else
243
244 /* radeon tiling on r300-r500 has 4 states,
245 macro-linear/micro-linear
246 macro-linear/micro-tiled
247 macro-tiled /micro-linear
248 macro-tiled /micro-tiled
249 1 byte surface
250 2 byte surface - two types - we only provide 8x2 microtiling
251 4 byte surface
252 8/16 byte (unused)
253 */
254 static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
255 GLint x, GLint y)
256 {
257 GLubyte *ptr = rrb->bo->ptr;
258 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
259 GLint offset;
260
261 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
262 offset = x * rrb->cpp + y * rrb->pitch;
263 } else {
264 offset = 0;
265 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
266 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
267 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
268 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
269 offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
270 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
271 offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
272 offset += ((y >> 1) & 0x1) << 6;
273 offset += ((x >> 2) & 0x1) << 5;
274 offset += (y & 1) << 4;
275 offset += (x & 3) << 2;
276 } else {
277 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
278 offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
279 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
280 offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
281 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
282 offset += (y & 1) << 6;
283 offset += (x & 15) << 2;
284 }
285 } else {
286 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
287 offset += (y & 1) << 4;
288 offset += (x & 3) << 2;
289 }
290 }
291 return &ptr[offset];
292 }
293
294 static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
295 GLint x, GLint y)
296 {
297 GLubyte *ptr = rrb->bo->ptr;
298 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
299 GLint offset;
300
301 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
302 offset = x * rrb->cpp + y * rrb->pitch;
303 } else {
304 offset = 0;
305 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
306 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
307 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
308 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
309 offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
310 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
311 offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
312 offset += ((y >> 1) & 0x1) << 6;
313 offset += ((x >> 3) & 0x1) << 5;
314 offset += (y & 1) << 4;
315 offset += (x & 3) << 2;
316 } else {
317 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
318 offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
319 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
320 offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
321 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
322 offset += (y & 1) << 6;
323 offset += ((x >> 4) & 0x1) << 5;
324 offset += (x & 15) << 2;
325 }
326 } else {
327 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
328 offset += (y & 0x1) << 4;
329 offset += (x & 0x7) << 1;
330 }
331 }
332 return &ptr[offset];
333 }
334
335 #endif
336
337 #ifndef COMPILE_R300
338 #ifndef COMPILE_R600
339 static uint32_t
340 z24s8_to_s8z24(uint32_t val)
341 {
342 return (val << 24) | (val >> 8);
343 }
344
345 static uint32_t
346 s8z24_to_z24s8(uint32_t val)
347 {
348 return (val >> 24) | (val << 8);
349 }
350 #endif
351 #endif
352
353 /*
354 * Note that all information needed to access pixels in a renderbuffer
355 * should be obtained through the gl_renderbuffer parameter, not per-context
356 * information.
357 */
358 #define LOCAL_VARS \
359 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
360 struct radeon_renderbuffer *rrb = (void *) rb; \
361 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
362 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
363 unsigned int num_cliprects; \
364 struct drm_clip_rect *cliprects; \
365 int x_off, y_off; \
366 GLuint p; \
367 (void)p; \
368 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
369
370 #define LOCAL_DEPTH_VARS \
371 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
372 struct radeon_renderbuffer *rrb = (void *) rb; \
373 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
374 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
375 unsigned int num_cliprects; \
376 struct drm_clip_rect *cliprects; \
377 int x_off, y_off; \
378 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
379
380 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
381
382 #define Y_FLIP(_y) ((_y) * yScale + yBias)
383
384 #define HW_LOCK()
385
386 #define HW_UNLOCK()
387
388 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
389 * the cliprect info from the context, not the driDrawable.
390 * Move this into spantmp2.h someday.
391 */
392 #define HW_CLIPLOOP() \
393 do { \
394 int _nc = num_cliprects; \
395 while ( _nc-- ) { \
396 int minx = cliprects[_nc].x1 - x_off; \
397 int miny = cliprects[_nc].y1 - y_off; \
398 int maxx = cliprects[_nc].x2 - x_off; \
399 int maxy = cliprects[_nc].y2 - y_off;
400
401 /* ================================================================
402 * Color buffer
403 */
404
405 /* 16 bit, RGB565 color spanline and pixel functions
406 */
407 #define SPANTMP_PIXEL_FMT GL_RGB
408 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
409
410 #define TAG(x) radeon##x##_RGB565
411 #define TAG2(x,y) radeon##x##_RGB565##y
412 #if defined(RADEON_COMMON_FOR_R600)
413 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
414 #else
415 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
416 #endif
417 #include "spantmp2.h"
418
419 /* 16 bit, ARGB1555 color spanline and pixel functions
420 */
421 #define SPANTMP_PIXEL_FMT GL_BGRA
422 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
423
424 #define TAG(x) radeon##x##_ARGB1555
425 #define TAG2(x,y) radeon##x##_ARGB1555##y
426 #if defined(RADEON_COMMON_FOR_R600)
427 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
428 #else
429 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
430 #endif
431 #include "spantmp2.h"
432
433 /* 16 bit, RGBA4 color spanline and pixel functions
434 */
435 #define SPANTMP_PIXEL_FMT GL_BGRA
436 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
437
438 #define TAG(x) radeon##x##_ARGB4444
439 #define TAG2(x,y) radeon##x##_ARGB4444##y
440 #if defined(RADEON_COMMON_FOR_R600)
441 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
442 #else
443 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
444 #endif
445 #include "spantmp2.h"
446
447 /* 32 bit, xRGB8888 color spanline and pixel functions
448 */
449 #define SPANTMP_PIXEL_FMT GL_BGRA
450 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
451
452 #define TAG(x) radeon##x##_xRGB8888
453 #define TAG2(x,y) radeon##x##_xRGB8888##y
454 #if defined(RADEON_COMMON_FOR_R600)
455 #define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
456 #define PUT_VALUE(_x, _y, d) { \
457 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
458 *_ptr = d; \
459 } while (0)
460 #else
461 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
462 #define PUT_VALUE(_x, _y, d) { \
463 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
464 *_ptr = d; \
465 } while (0)
466 #endif
467 #include "spantmp2.h"
468
469 /* 32 bit, ARGB8888 color spanline and pixel functions
470 */
471 #define SPANTMP_PIXEL_FMT GL_BGRA
472 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
473
474 #define TAG(x) radeon##x##_ARGB8888
475 #define TAG2(x,y) radeon##x##_ARGB8888##y
476 #if defined(RADEON_COMMON_FOR_R600)
477 #define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
478 #define PUT_VALUE(_x, _y, d) { \
479 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
480 *_ptr = d; \
481 } while (0)
482 #else
483 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
484 #define PUT_VALUE(_x, _y, d) { \
485 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
486 *_ptr = d; \
487 } while (0)
488 #endif
489 #include "spantmp2.h"
490
491 /* ================================================================
492 * Depth buffer
493 */
494
495 /* The Radeon family has depth tiling on all the time, so we have to convert
496 * the x,y coordinates into the memory bus address (mba) in the same
497 * manner as the engine. In each case, the linear block address (ba)
498 * is calculated, and then wired with x and y to produce the final
499 * memory address.
500 * The chip will do address translation on its own if the surface registers
501 * are set up correctly. It is not quite enough to get it working with hyperz
502 * too...
503 */
504
505 /* 16-bit depth buffer functions
506 */
507 #define VALUE_TYPE GLushort
508
509 #if defined(RADEON_COMMON_FOR_R200)
510 #define WRITE_DEPTH( _x, _y, d ) \
511 *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
512 #elif defined(RADEON_COMMON_FOR_R600)
513 #define WRITE_DEPTH( _x, _y, d ) \
514 *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
515 #else
516 #define WRITE_DEPTH( _x, _y, d ) \
517 *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
518 #endif
519
520 #if defined(RADEON_COMMON_FOR_R200)
521 #define READ_DEPTH( d, _x, _y ) \
522 d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
523 #elif defined(RADEON_COMMON_FOR_R600)
524 #define READ_DEPTH( d, _x, _y ) \
525 d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
526 #else
527 #define READ_DEPTH( d, _x, _y ) \
528 d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
529 #endif
530
531 #define TAG(x) radeon##x##_z16
532 #include "depthtmp.h"
533
534 /* 24 bit depth
535 *
536 * Careful: It looks like the R300 uses ZZZS byte order while the R200
537 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
538 */
539 #define VALUE_TYPE GLuint
540
541 #if defined(COMPILE_R300)
542 #define WRITE_DEPTH( _x, _y, d ) \
543 do { \
544 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
545 GLuint tmp = *_ptr; \
546 tmp &= 0x000000ff; \
547 tmp |= ((d << 8) & 0xffffff00); \
548 *_ptr = tmp; \
549 } while (0)
550 #elif defined(RADEON_COMMON_FOR_R600)
551 #define WRITE_DEPTH( _x, _y, d ) \
552 do { \
553 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
554 GLuint tmp = *_ptr; \
555 tmp &= 0xff000000; \
556 tmp |= ((d) & 0x00ffffff); \
557 *_ptr = tmp; \
558 } while (0)
559 #elif defined(RADEON_COMMON_FOR_R200)
560 #define WRITE_DEPTH( _x, _y, d ) \
561 do { \
562 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
563 GLuint tmp = *_ptr; \
564 tmp &= 0xff000000; \
565 tmp |= ((d) & 0x00ffffff); \
566 *_ptr = tmp; \
567 } while (0)
568 #else
569 #define WRITE_DEPTH( _x, _y, d ) \
570 do { \
571 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
572 GLuint tmp = *_ptr; \
573 tmp &= 0xff000000; \
574 tmp |= ((d) & 0x00ffffff); \
575 *_ptr = tmp; \
576 } while (0)
577 #endif
578
579 #if defined(COMPILE_R300)
580 #define READ_DEPTH( d, _x, _y ) \
581 do { \
582 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
583 }while(0)
584 #elif defined(RADEON_COMMON_FOR_R600)
585 #define READ_DEPTH( d, _x, _y ) \
586 do { \
587 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
588 }while(0)
589 #elif defined(RADEON_COMMON_FOR_R200)
590 #define READ_DEPTH( d, _x, _y ) \
591 do { \
592 d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
593 }while(0)
594 #else
595 #define READ_DEPTH( d, _x, _y ) \
596 d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
597 #endif
598
599 #define TAG(x) radeon##x##_z24
600 #include "depthtmp.h"
601
602 /* 24 bit depth, 8 bit stencil depthbuffer functions
603 * EXT_depth_stencil
604 *
605 * Careful: It looks like the R300 uses ZZZS byte order while the R200
606 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
607 */
608 #define VALUE_TYPE GLuint
609
610 #if defined(COMPILE_R300)
611 #define WRITE_DEPTH( _x, _y, d ) \
612 do { \
613 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
614 *_ptr = d; \
615 } while (0)
616 #elif defined(RADEON_COMMON_FOR_R600)
617 #define WRITE_DEPTH( _x, _y, d ) \
618 do { \
619 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
620 GLuint tmp = *_ptr; \
621 tmp &= 0xff000000; \
622 tmp |= (((d) >> 8) & 0x00ffffff); \
623 *_ptr = tmp; \
624 _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
625 tmp = *_ptr; \
626 tmp &= 0xffffff00; \
627 tmp |= (d) & 0xff; \
628 *_ptr = tmp; \
629 } while (0)
630 #elif defined(RADEON_COMMON_FOR_R200)
631 #define WRITE_DEPTH( _x, _y, d ) \
632 do { \
633 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
634 GLuint tmp = z24s8_to_s8z24(d); \
635 *_ptr = tmp; \
636 } while (0)
637 #else
638 #define WRITE_DEPTH( _x, _y, d ) \
639 do { \
640 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
641 GLuint tmp = z24s8_to_s8z24(d); \
642 *_ptr = tmp; \
643 } while (0)
644 #endif
645
646 #if defined(COMPILE_R300)
647 #define READ_DEPTH( d, _x, _y ) \
648 do { \
649 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
650 }while(0)
651 #elif defined(RADEON_COMMON_FOR_R600)
652 #define READ_DEPTH( d, _x, _y ) \
653 do { \
654 d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
655 d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \
656 }while(0)
657 #elif defined(RADEON_COMMON_FOR_R200)
658 #define READ_DEPTH( d, _x, _y ) \
659 do { \
660 d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
661 }while(0)
662 #else
663 #define READ_DEPTH( d, _x, _y ) do { \
664 d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \
665 } while (0)
666 #endif
667
668 #define TAG(x) radeon##x##_z24_s8
669 #include "depthtmp.h"
670
671 /* ================================================================
672 * Stencil buffer
673 */
674
675 /* 24 bit depth, 8 bit stencil depthbuffer functions
676 */
677 #ifdef COMPILE_R300
678 #define WRITE_STENCIL( _x, _y, d ) \
679 do { \
680 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
681 GLuint tmp = *_ptr; \
682 tmp &= 0xffffff00; \
683 tmp |= (d) & 0xff; \
684 *_ptr = tmp; \
685 } while (0)
686 #elif defined(RADEON_COMMON_FOR_R600)
687 #define WRITE_STENCIL( _x, _y, d ) \
688 do { \
689 GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
690 GLuint tmp = *_ptr; \
691 tmp &= 0xffffff00; \
692 tmp |= (d) & 0xff; \
693 *_ptr = tmp; \
694 } while (0)
695 #elif defined(RADEON_COMMON_FOR_R200)
696 #define WRITE_STENCIL( _x, _y, d ) \
697 do { \
698 GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
699 GLuint tmp = *_ptr; \
700 tmp &= 0x00ffffff; \
701 tmp |= (((d) & 0xff) << 24); \
702 *_ptr = tmp; \
703 } while (0)
704 #else
705 #define WRITE_STENCIL( _x, _y, d ) \
706 do { \
707 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
708 GLuint tmp = *_ptr; \
709 tmp &= 0x00ffffff; \
710 tmp |= (((d) & 0xff) << 24); \
711 *_ptr = tmp; \
712 } while (0)
713 #endif
714
715 #ifdef COMPILE_R300
716 #define READ_STENCIL( d, _x, _y ) \
717 do { \
718 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
719 GLuint tmp = *_ptr; \
720 d = tmp & 0x000000ff; \
721 } while (0)
722 #elif defined(RADEON_COMMON_FOR_R600)
723 #define READ_STENCIL( d, _x, _y ) \
724 do { \
725 GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
726 GLuint tmp = *_ptr; \
727 d = tmp & 0x000000ff; \
728 } while (0)
729 #elif defined(RADEON_COMMON_FOR_R200)
730 #define READ_STENCIL( d, _x, _y ) \
731 do { \
732 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
733 GLuint tmp = *_ptr; \
734 d = (tmp & 0xff000000) >> 24; \
735 } while (0)
736 #else
737 #define READ_STENCIL( d, _x, _y ) \
738 do { \
739 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
740 GLuint tmp = *_ptr; \
741 d = (tmp & 0xff000000) >> 24; \
742 } while (0)
743 #endif
744
745 #define TAG(x) radeon##x##_z24_s8
746 #include "stenciltmp.h"
747
748
749 static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
750 {
751 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
752 int r;
753
754 if (rrb == NULL || !rrb->bo)
755 return;
756
757 if (flag) {
758 if (rrb->bo->bom->funcs->bo_wait)
759 radeon_bo_wait(rrb->bo);
760 r = radeon_bo_map(rrb->bo, 1);
761 if (r) {
762 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
763 __FUNCTION__, r);
764 }
765
766 radeonSetSpanFunctions(rrb);
767 } else {
768 radeon_bo_unmap(rrb->bo);
769 rb->GetRow = NULL;
770 rb->PutRow = NULL;
771 }
772 }
773
774 static void
775 radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
776 {
777 GLuint i, j;
778
779 /* color draw buffers */
780 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
781 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
782
783 /* check for render to textures */
784 for (i = 0; i < BUFFER_COUNT; i++) {
785 struct gl_renderbuffer_attachment *att =
786 ctx->DrawBuffer->Attachment + i;
787 struct gl_texture_object *tex = att->Texture;
788 if (tex) {
789 /* Render to texture. Note that a mipmapped texture need not
790 * be complete for render to texture, so we must restrict to
791 * mapping only the attached image.
792 */
793 radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
794 ASSERT(att->Renderbuffer);
795
796 if (map)
797 radeon_teximage_map(image, GL_TRUE);
798 else
799 radeon_teximage_unmap(image);
800 }
801 }
802
803 map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
804
805 /* depth buffer (Note wrapper!) */
806 if (ctx->DrawBuffer->_DepthBuffer)
807 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
808
809 if (ctx->DrawBuffer->_StencilBuffer)
810 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
811 }
812
813 static void radeonSpanRenderStart(GLcontext * ctx)
814 {
815 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
816 int i;
817
818 radeon_firevertices(rmesa);
819
820 /* The locking and wait for idle should really only be needed in classic mode.
821 * In a future memory manager based implementation, this should become
822 * unnecessary due to the fact that mapping our buffers, textures, etc.
823 * should implicitly wait for any previous rendering commands that must
824 * be waited on. */
825 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
826 LOCK_HARDWARE(rmesa);
827 radeonWaitForIdleLocked(rmesa);
828 }
829
830 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
831 if (ctx->Texture.Unit[i]._ReallyEnabled)
832 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
833 }
834
835 radeon_map_unmap_buffers(ctx, 1);
836 }
837
838 static void radeonSpanRenderFinish(GLcontext * ctx)
839 {
840 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
841 int i;
842 _swrast_flush(ctx);
843 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
844 UNLOCK_HARDWARE(rmesa);
845 }
846 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
847 if (ctx->Texture.Unit[i]._ReallyEnabled)
848 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
849 }
850
851 radeon_map_unmap_buffers(ctx, 0);
852 }
853
854 void radeonInitSpanFuncs(GLcontext * ctx)
855 {
856 struct swrast_device_driver *swdd =
857 _swrast_GetDeviceDriverReference(ctx);
858 swdd->SpanRenderStart = radeonSpanRenderStart;
859 swdd->SpanRenderFinish = radeonSpanRenderFinish;
860 }
861
862 /**
863 * Plug in the Get/Put routines for the given driRenderbuffer.
864 */
865 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
866 {
867 if (rrb->base._ActualFormat == GL_RGB5) {
868 radeonInitPointers_RGB565(&rrb->base);
869 } else if (rrb->base._ActualFormat == GL_RGB8) {
870 radeonInitPointers_xRGB8888(&rrb->base);
871 } else if (rrb->base._ActualFormat == GL_RGBA8) {
872 radeonInitPointers_ARGB8888(&rrb->base);
873 } else if (rrb->base._ActualFormat == GL_RGBA4) {
874 radeonInitPointers_ARGB4444(&rrb->base);
875 } else if (rrb->base._ActualFormat == GL_RGB5_A1) {
876 radeonInitPointers_ARGB1555(&rrb->base);
877 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
878 radeonInitDepthPointers_z16(&rrb->base);
879 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
880 radeonInitDepthPointers_z24(&rrb->base);
881 } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
882 radeonInitDepthPointers_z24_s8(&rrb->base);
883 } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
884 radeonInitStencilPointers_z24_s8(&rrb->base);
885 } else {
886 fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat);
887 }
888 }