radeon: get rid of z24s8 <-> s8z24 conversions in span code
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "swrast/swrast.h"
45
46 #include "radeon_common.h"
47 #include "radeon_lock.h"
48 #include "radeon_span.h"
49
50 #define DBG 0
51
52 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54
55 /* r200 depth buffer is always tiled - this is the formula
56 according to the docs unless I typo'ed in it
57 */
58 #if defined(RADEON_R200)
59 static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
60 GLint x, GLint y)
61 {
62 GLubyte *ptr = rrb->bo->ptr;
63 GLint offset;
64 if (rrb->has_surface) {
65 offset = x * rrb->cpp + y * rrb->pitch;
66 } else {
67 GLuint b;
68 offset = 0;
69 b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6)));
70 offset += (b >> 1) << 12;
71 offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
72 offset += ((y >> 2) & 0x3) << 9;
73 offset += ((x >> 3) & 0x1) << 8;
74 offset += ((x >> 4) & 0x3) << 6;
75 offset += ((x >> 2) & 0x1) << 5;
76 offset += ((y >> 1) & 0x1) << 4;
77 offset += ((x >> 1) & 0x1) << 3;
78 offset += (y & 0x1) << 2;
79 offset += (x & 0x1) << 1;
80 }
81 return &ptr[offset];
82 }
83
84 static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
85 GLint x, GLint y)
86 {
87 GLubyte *ptr = rrb->bo->ptr;
88 GLint offset;
89 if (rrb->has_surface) {
90 offset = x * rrb->cpp + y * rrb->pitch;
91 } else {
92 GLuint b;
93 offset = 0;
94 b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
95 offset += (b >> 1) << 12;
96 offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
97 offset += ((y >> 2) & 0x3) << 9;
98 offset += ((x >> 2) & 0x1) << 8;
99 offset += ((x >> 3) & 0x3) << 6;
100 offset += ((y >> 1) & 0x1) << 5;
101 offset += ((x >> 1) & 0x1) << 4;
102 offset += (y & 0x1) << 3;
103 offset += (x & 0x1) << 2;
104 }
105 return &ptr[offset];
106 }
107 #endif
108
109 /* r600 tiling
110 * two main types:
111 * - 1D (akin to macro-linear/micro-tiled on older asics)
112 * - 2D (akin to macro-tiled/micro-tiled on older asics)
113 * only 1D tiling is implemented below
114 */
115 #if defined(RADEON_R600)
116 static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
117 GLint x, GLint y, GLint is_depth, GLint is_stencil)
118 {
119 GLint element_bytes = rrb->cpp;
120 GLint num_samples = 1;
121 GLint tile_width = 8;
122 GLint tile_height = 8;
123 GLint tile_thickness = 1;
124 GLint pitch_elements = rrb->pitch / element_bytes;
125 GLint height = rrb->base.Height;
126 GLint z = 0;
127 GLint sample_number = 0;
128 /* */
129 GLint tile_bytes;
130 GLint tiles_per_row;
131 GLint tiles_per_slice;
132 GLint slice_offset;
133 GLint tile_row_index;
134 GLint tile_column_index;
135 GLint tile_offset;
136 GLint pixel_number = 0;
137 GLint element_offset;
138 GLint offset = 0;
139
140 tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
141 tiles_per_row = pitch_elements / tile_width;
142 tiles_per_slice = tiles_per_row * (height / tile_height);
143 slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
144 tile_row_index = y / tile_height;
145 tile_column_index = x / tile_width;
146 tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
147
148 if (is_depth) {
149 GLint pixel_offset = 0;
150
151 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
152 pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
153 pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
154 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
155 pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
156 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
157 switch (element_bytes) {
158 case 2:
159 pixel_offset = pixel_number * element_bytes * num_samples;
160 break;
161 case 4:
162 /* stencil and depth data are stored separately within a tile.
163 * stencil is stored in a contiguous tile before the depth tile.
164 * stencil element is 1 byte, depth element is 3 bytes.
165 * stencil tile is 64 bytes.
166 */
167 if (is_stencil)
168 pixel_offset = pixel_number * 1 * num_samples;
169 else
170 pixel_offset = (pixel_number * 3 * num_samples) + 64;
171 break;
172 }
173 element_offset = pixel_offset + (sample_number * element_bytes);
174 } else {
175 GLint sample_offset;
176
177 switch (element_bytes) {
178 case 1:
179 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
180 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
181 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
182 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
183 pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
184 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
185 break;
186 case 2:
187 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
188 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
189 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
190 pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
191 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
192 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
193 break;
194 case 4:
195 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
196 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
197 pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
198 pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
199 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
200 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
201 break;
202 }
203 sample_offset = sample_number * (tile_bytes / num_samples);
204 element_offset = sample_offset + (pixel_number * element_bytes);
205 }
206 offset = slice_offset + tile_offset + element_offset;
207 return offset;
208 }
209
210 /* depth buffers */
211 static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
212 GLint x, GLint y)
213 {
214 GLubyte *ptr = rrb->bo->ptr;
215 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
216 return &ptr[offset];
217 }
218
219 static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
220 GLint x, GLint y)
221 {
222 GLubyte *ptr = rrb->bo->ptr;
223 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
224 return &ptr[offset];
225 }
226
227 static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
228 GLint x, GLint y)
229 {
230 GLubyte *ptr = rrb->bo->ptr;
231 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
232 GLint offset;
233
234 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
235 offset = x * rrb->cpp + y * rrb->pitch;
236 } else {
237 offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
238 }
239 return &ptr[offset];
240 }
241
242 #else
243
244 /* radeon tiling on r300-r500 has 4 states,
245 macro-linear/micro-linear
246 macro-linear/micro-tiled
247 macro-tiled /micro-linear
248 macro-tiled /micro-tiled
249 1 byte surface
250 2 byte surface - two types - we only provide 8x2 microtiling
251 4 byte surface
252 8/16 byte (unused)
253 */
254 static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
255 GLint x, GLint y)
256 {
257 GLubyte *ptr = rrb->bo->ptr;
258 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
259 GLint offset;
260
261 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
262 offset = x * rrb->cpp + y * rrb->pitch;
263 } else {
264 offset = 0;
265 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
266 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
267 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
268 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
269 offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
270 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
271 offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
272 offset += ((y >> 1) & 0x1) << 6;
273 offset += ((x >> 2) & 0x1) << 5;
274 offset += (y & 1) << 4;
275 offset += (x & 3) << 2;
276 } else {
277 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
278 offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
279 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
280 offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
281 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
282 offset += (y & 1) << 6;
283 offset += (x & 15) << 2;
284 }
285 } else {
286 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
287 offset += (y & 1) << 4;
288 offset += (x & 3) << 2;
289 }
290 }
291 return &ptr[offset];
292 }
293
294 static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
295 GLint x, GLint y)
296 {
297 GLubyte *ptr = rrb->bo->ptr;
298 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
299 GLint offset;
300
301 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
302 offset = x * rrb->cpp + y * rrb->pitch;
303 } else {
304 offset = 0;
305 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
306 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
307 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
308 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
309 offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
310 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
311 offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
312 offset += ((y >> 1) & 0x1) << 6;
313 offset += ((x >> 3) & 0x1) << 5;
314 offset += (y & 1) << 4;
315 offset += (x & 3) << 2;
316 } else {
317 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
318 offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
319 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
320 offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
321 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
322 offset += (y & 1) << 6;
323 offset += ((x >> 4) & 0x1) << 5;
324 offset += (x & 15) << 2;
325 }
326 } else {
327 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
328 offset += (y & 0x1) << 4;
329 offset += (x & 0x7) << 1;
330 }
331 }
332 return &ptr[offset];
333 }
334
335 #endif
336
337 /*
338 * Note that all information needed to access pixels in a renderbuffer
339 * should be obtained through the gl_renderbuffer parameter, not per-context
340 * information.
341 */
342 #define LOCAL_VARS \
343 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
344 struct radeon_renderbuffer *rrb = (void *) rb; \
345 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
346 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
347 unsigned int num_cliprects; \
348 struct drm_clip_rect *cliprects; \
349 int x_off, y_off; \
350 GLuint p; \
351 (void)p; \
352 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
353
354 #define LOCAL_DEPTH_VARS \
355 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
356 struct radeon_renderbuffer *rrb = (void *) rb; \
357 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
358 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
359 unsigned int num_cliprects; \
360 struct drm_clip_rect *cliprects; \
361 int x_off, y_off; \
362 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
363
364 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
365
366 #define Y_FLIP(_y) ((_y) * yScale + yBias)
367
368 #define HW_LOCK()
369
370 #define HW_UNLOCK()
371
372 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
373 * the cliprect info from the context, not the driDrawable.
374 * Move this into spantmp2.h someday.
375 */
376 #define HW_CLIPLOOP() \
377 do { \
378 int _nc = num_cliprects; \
379 while ( _nc-- ) { \
380 int minx = cliprects[_nc].x1 - x_off; \
381 int miny = cliprects[_nc].y1 - y_off; \
382 int maxx = cliprects[_nc].x2 - x_off; \
383 int maxy = cliprects[_nc].y2 - y_off;
384
385 /* ================================================================
386 * Color buffer
387 */
388
389 /* 16 bit, RGB565 color spanline and pixel functions
390 */
391 #define SPANTMP_PIXEL_FMT GL_RGB
392 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
393
394 #define TAG(x) radeon##x##_RGB565
395 #define TAG2(x,y) radeon##x##_RGB565##y
396 #if defined(RADEON_R600)
397 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
398 #else
399 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
400 #endif
401 #include "spantmp2.h"
402
403 /* 16 bit, ARGB1555 color spanline and pixel functions
404 */
405 #define SPANTMP_PIXEL_FMT GL_BGRA
406 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
407
408 #define TAG(x) radeon##x##_ARGB1555
409 #define TAG2(x,y) radeon##x##_ARGB1555##y
410 #if defined(RADEON_R600)
411 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
412 #else
413 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
414 #endif
415 #include "spantmp2.h"
416
417 /* 16 bit, RGBA4 color spanline and pixel functions
418 */
419 #define SPANTMP_PIXEL_FMT GL_BGRA
420 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
421
422 #define TAG(x) radeon##x##_ARGB4444
423 #define TAG2(x,y) radeon##x##_ARGB4444##y
424 #if defined(RADEON_R600)
425 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
426 #else
427 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
428 #endif
429 #include "spantmp2.h"
430
431 /* 32 bit, xRGB8888 color spanline and pixel functions
432 */
433 #define SPANTMP_PIXEL_FMT GL_BGRA
434 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
435
436 #define TAG(x) radeon##x##_xRGB8888
437 #define TAG2(x,y) radeon##x##_xRGB8888##y
438 #if defined(RADEON_R600)
439 #define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
440 #define PUT_VALUE(_x, _y, d) { \
441 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
442 *_ptr = d; \
443 } while (0)
444 #else
445 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
446 #define PUT_VALUE(_x, _y, d) { \
447 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
448 *_ptr = d; \
449 } while (0)
450 #endif
451 #include "spantmp2.h"
452
453 /* 32 bit, ARGB8888 color spanline and pixel functions
454 */
455 #define SPANTMP_PIXEL_FMT GL_BGRA
456 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
457
458 #define TAG(x) radeon##x##_ARGB8888
459 #define TAG2(x,y) radeon##x##_ARGB8888##y
460 #if defined(RADEON_R600)
461 #define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
462 #define PUT_VALUE(_x, _y, d) { \
463 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
464 *_ptr = d; \
465 } while (0)
466 #else
467 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
468 #define PUT_VALUE(_x, _y, d) { \
469 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
470 *_ptr = d; \
471 } while (0)
472 #endif
473 #include "spantmp2.h"
474
475 /* ================================================================
476 * Depth buffer
477 */
478
479 /* The Radeon family has depth tiling on all the time, so we have to convert
480 * the x,y coordinates into the memory bus address (mba) in the same
481 * manner as the engine. In each case, the linear block address (ba)
482 * is calculated, and then wired with x and y to produce the final
483 * memory address.
484 * The chip will do address translation on its own if the surface registers
485 * are set up correctly. It is not quite enough to get it working with hyperz
486 * too...
487 */
488
489 /* 16-bit depth buffer functions
490 */
491 #define VALUE_TYPE GLushort
492
493 #if defined(RADEON_R200)
494 #define WRITE_DEPTH( _x, _y, d ) \
495 *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
496 #elif defined(RADEON_R600)
497 #define WRITE_DEPTH( _x, _y, d ) \
498 *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
499 #else
500 #define WRITE_DEPTH( _x, _y, d ) \
501 *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
502 #endif
503
504 #if defined(RADEON_R200)
505 #define READ_DEPTH( d, _x, _y ) \
506 d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
507 #elif defined(RADEON_R600)
508 #define READ_DEPTH( d, _x, _y ) \
509 d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
510 #else
511 #define READ_DEPTH( d, _x, _y ) \
512 d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
513 #endif
514
515 #define TAG(x) radeon##x##_z16
516 #include "depthtmp.h"
517
518 /* 24 bit depth
519 *
520 * Careful: It looks like the R300 uses ZZZS byte order while the R200
521 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
522 */
523 #define VALUE_TYPE GLuint
524
525 #if defined(RADEON_R300)
526 #define WRITE_DEPTH( _x, _y, d ) \
527 do { \
528 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
529 GLuint tmp = *_ptr; \
530 tmp &= 0x000000ff; \
531 tmp |= ((d << 8) & 0xffffff00); \
532 *_ptr = tmp; \
533 } while (0)
534 #elif defined(RADEON_R600)
535 #define WRITE_DEPTH( _x, _y, d ) \
536 do { \
537 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
538 GLuint tmp = *_ptr; \
539 tmp &= 0xff000000; \
540 tmp |= ((d) & 0x00ffffff); \
541 *_ptr = tmp; \
542 } while (0)
543 #elif defined(RADEON_R200)
544 #define WRITE_DEPTH( _x, _y, d ) \
545 do { \
546 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
547 GLuint tmp = *_ptr; \
548 tmp &= 0xff000000; \
549 tmp |= ((d) & 0x00ffffff); \
550 *_ptr = tmp; \
551 } while (0)
552 #else
553 #define WRITE_DEPTH( _x, _y, d ) \
554 do { \
555 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
556 GLuint tmp = *_ptr; \
557 tmp &= 0xff000000; \
558 tmp |= ((d) & 0x00ffffff); \
559 *_ptr = tmp; \
560 } while (0)
561 #endif
562
563 #if defined(RADEON_R300)
564 #define READ_DEPTH( d, _x, _y ) \
565 do { \
566 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
567 }while(0)
568 #elif defined(RADEON_R600)
569 #define READ_DEPTH( d, _x, _y ) \
570 do { \
571 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
572 }while(0)
573 #elif defined(RADEON_R200)
574 #define READ_DEPTH( d, _x, _y ) \
575 do { \
576 d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
577 }while(0)
578 #else
579 #define READ_DEPTH( d, _x, _y ) \
580 d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
581 #endif
582
583 #define TAG(x) radeon##x##_z24
584 #include "depthtmp.h"
585
586 /* 24 bit depth, 8 bit stencil depthbuffer functions
587 * EXT_depth_stencil
588 *
589 * Careful: It looks like the R300 uses ZZZS byte order while the R200
590 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
591 */
592 #define VALUE_TYPE GLuint
593
594 #if defined(RADEON_R300)
595 #define WRITE_DEPTH( _x, _y, d ) \
596 do { \
597 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
598 *_ptr = d; \
599 } while (0)
600 #elif defined(RADEON_R600)
601 #define WRITE_DEPTH( _x, _y, d ) \
602 do { \
603 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
604 GLuint tmp = *_ptr; \
605 tmp &= 0xff000000; \
606 tmp |= (((d) >> 8) & 0x00ffffff); \
607 *_ptr = tmp; \
608 _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
609 tmp = *_ptr; \
610 tmp &= 0xffffff00; \
611 tmp |= (d) & 0xff; \
612 *_ptr = tmp; \
613 } while (0)
614 #elif defined(RADEON_R200)
615 #define WRITE_DEPTH( _x, _y, d ) \
616 do { \
617 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
618 *_ptr = d; \
619 } while (0)
620 #else
621 #define WRITE_DEPTH( _x, _y, d ) \
622 do { \
623 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
624 *_ptr = d; \
625 } while (0)
626 #endif
627
628 #if defined(RADEON_R300)
629 #define READ_DEPTH( d, _x, _y ) \
630 do { \
631 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
632 }while(0)
633 #elif defined(RADEON_R600)
634 #define READ_DEPTH( d, _x, _y ) \
635 do { \
636 d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
637 d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \
638 }while(0)
639 #elif defined(RADEON_R200)
640 #define READ_DEPTH( d, _x, _y ) \
641 do { \
642 d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)); \
643 }while(0)
644 #else
645 #define READ_DEPTH( d, _x, _y ) do { \
646 d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off )); \
647 } while (0)
648 #endif
649
650 #define TAG(x) radeon##x##_s8_z24
651 #include "depthtmp.h"
652
653 /* ================================================================
654 * Stencil buffer
655 */
656
657 /* 24 bit depth, 8 bit stencil depthbuffer functions
658 */
659 #ifdef RADEON_R300
660 #define WRITE_STENCIL( _x, _y, d ) \
661 do { \
662 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
663 GLuint tmp = *_ptr; \
664 tmp &= 0xffffff00; \
665 tmp |= (d) & 0xff; \
666 *_ptr = tmp; \
667 } while (0)
668 #elif defined(RADEON_R600)
669 #define WRITE_STENCIL( _x, _y, d ) \
670 do { \
671 GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
672 GLuint tmp = *_ptr; \
673 tmp &= 0xffffff00; \
674 tmp |= (d) & 0xff; \
675 *_ptr = tmp; \
676 } while (0)
677 #elif defined(RADEON_R200)
678 #define WRITE_STENCIL( _x, _y, d ) \
679 do { \
680 GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
681 GLuint tmp = *_ptr; \
682 tmp &= 0x00ffffff; \
683 tmp |= (((d) & 0xff) << 24); \
684 *_ptr = tmp; \
685 } while (0)
686 #else
687 #define WRITE_STENCIL( _x, _y, d ) \
688 do { \
689 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
690 GLuint tmp = *_ptr; \
691 tmp &= 0x00ffffff; \
692 tmp |= (((d) & 0xff) << 24); \
693 *_ptr = tmp; \
694 } while (0)
695 #endif
696
697 #ifdef RADEON_R300
698 #define READ_STENCIL( d, _x, _y ) \
699 do { \
700 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
701 GLuint tmp = *_ptr; \
702 d = tmp & 0x000000ff; \
703 } while (0)
704 #elif defined(RADEON_R600)
705 #define READ_STENCIL( d, _x, _y ) \
706 do { \
707 GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
708 GLuint tmp = *_ptr; \
709 d = tmp & 0x000000ff; \
710 } while (0)
711 #elif defined(RADEON_R200)
712 #define READ_STENCIL( d, _x, _y ) \
713 do { \
714 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
715 GLuint tmp = *_ptr; \
716 d = (tmp & 0xff000000) >> 24; \
717 } while (0)
718 #else
719 #define READ_STENCIL( d, _x, _y ) \
720 do { \
721 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
722 GLuint tmp = *_ptr; \
723 d = (tmp & 0xff000000) >> 24; \
724 } while (0)
725 #endif
726
727 #define TAG(x) radeon##x##_s8_z24
728 #include "stenciltmp.h"
729
730
731 static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
732 {
733 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
734 int r;
735
736 if (rrb == NULL || !rrb->bo)
737 return;
738
739 if (flag) {
740 if (rrb->bo->bom->funcs->bo_wait)
741 radeon_bo_wait(rrb->bo);
742 r = radeon_bo_map(rrb->bo, 1);
743 if (r) {
744 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
745 __FUNCTION__, r);
746 }
747
748 radeonSetSpanFunctions(rrb);
749 } else {
750 radeon_bo_unmap(rrb->bo);
751 rb->GetRow = NULL;
752 rb->PutRow = NULL;
753 }
754 }
755
756 static void
757 radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
758 {
759 GLuint i, j;
760
761 /* color draw buffers */
762 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
763 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
764
765 /* check for render to textures */
766 for (i = 0; i < BUFFER_COUNT; i++) {
767 struct gl_renderbuffer_attachment *att =
768 ctx->DrawBuffer->Attachment + i;
769 struct gl_texture_object *tex = att->Texture;
770 if (tex) {
771 /* Render to texture. Note that a mipmapped texture need not
772 * be complete for render to texture, so we must restrict to
773 * mapping only the attached image.
774 */
775 radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
776 ASSERT(att->Renderbuffer);
777
778 if (map)
779 radeon_teximage_map(image, GL_TRUE);
780 else
781 radeon_teximage_unmap(image);
782 }
783 }
784
785 map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
786
787 /* depth buffer (Note wrapper!) */
788 if (ctx->DrawBuffer->_DepthBuffer)
789 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
790
791 if (ctx->DrawBuffer->_StencilBuffer)
792 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
793 }
794
795 static void radeonSpanRenderStart(GLcontext * ctx)
796 {
797 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
798 int i;
799
800 radeon_firevertices(rmesa);
801
802 /* The locking and wait for idle should really only be needed in classic mode.
803 * In a future memory manager based implementation, this should become
804 * unnecessary due to the fact that mapping our buffers, textures, etc.
805 * should implicitly wait for any previous rendering commands that must
806 * be waited on. */
807 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
808 LOCK_HARDWARE(rmesa);
809 radeonWaitForIdleLocked(rmesa);
810 }
811
812 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
813 if (ctx->Texture.Unit[i]._ReallyEnabled)
814 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
815 }
816
817 radeon_map_unmap_buffers(ctx, 1);
818 }
819
820 static void radeonSpanRenderFinish(GLcontext * ctx)
821 {
822 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
823 int i;
824 _swrast_flush(ctx);
825 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
826 UNLOCK_HARDWARE(rmesa);
827 }
828 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
829 if (ctx->Texture.Unit[i]._ReallyEnabled)
830 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
831 }
832
833 radeon_map_unmap_buffers(ctx, 0);
834 }
835
836 void radeonInitSpanFuncs(GLcontext * ctx)
837 {
838 struct swrast_device_driver *swdd =
839 _swrast_GetDeviceDriverReference(ctx);
840 swdd->SpanRenderStart = radeonSpanRenderStart;
841 swdd->SpanRenderFinish = radeonSpanRenderFinish;
842 }
843
844 /**
845 * Plug in the Get/Put routines for the given driRenderbuffer.
846 */
847 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
848 {
849 if (rrb->base.Format == MESA_FORMAT_RGB565) {
850 radeonInitPointers_RGB565(&rrb->base);
851 } else if (rrb->base.Format == MESA_FORMAT_RGBA8888) { /* XXX */
852 radeonInitPointers_xRGB8888(&rrb->base);
853 } else if (rrb->base.Format == MESA_FORMAT_RGBA8888) {
854 radeonInitPointers_ARGB8888(&rrb->base);
855 } else if (rrb->base.Format == MESA_FORMAT_ARGB4444) {
856 radeonInitPointers_ARGB4444(&rrb->base);
857 } else if (rrb->base.Format == MESA_FORMAT_ARGB1555) {
858 radeonInitPointers_ARGB1555(&rrb->base);
859 } else if (rrb->base.Format == MESA_FORMAT_Z16) {
860 radeonInitDepthPointers_z16(&rrb->base);
861 } else if (rrb->base.Format == GL_DEPTH_COMPONENT32) { /* XXX */
862 radeonInitDepthPointers_z24(&rrb->base);
863 } else if (rrb->base.Format == MESA_FORMAT_S8_Z24) {
864 radeonInitDepthPointers_s8_z24(&rrb->base);
865 } else if (rrb->base.Format == MESA_FORMAT_S8) {
866 radeonInitStencilPointers_s8_z24(&rrb->base);
867 } else {
868 fprintf(stderr, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb->base.Format);
869 }
870 }