r600: minor span cleanups
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "swrast/swrast.h"
45
46 #include "radeon_common.h"
47 #include "radeon_lock.h"
48 #include "radeon_span.h"
49
50 #define DBG 0
51
52 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54
55 /* r200 depth buffer is always tiled - this is the formula
56 according to the docs unless I typo'ed in it
57 */
58 #if defined(RADEON_COMMON_FOR_R200)
59 static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
60 GLint x, GLint y)
61 {
62 GLubyte *ptr = rrb->bo->ptr;
63 GLint offset;
64 if (rrb->has_surface) {
65 offset = x * rrb->cpp + y * rrb->pitch;
66 } else {
67 GLuint b;
68 offset = 0;
69 b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6)));
70 offset += (b >> 1) << 12;
71 offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
72 offset += ((y >> 2) & 0x3) << 9;
73 offset += ((x >> 3) & 0x1) << 8;
74 offset += ((x >> 4) & 0x3) << 6;
75 offset += ((x >> 2) & 0x1) << 5;
76 offset += ((y >> 1) & 0x1) << 4;
77 offset += ((x >> 1) & 0x1) << 3;
78 offset += (y & 0x1) << 2;
79 offset += (x & 0x1) << 1;
80 }
81 return &ptr[offset];
82 }
83
84 static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
85 GLint x, GLint y)
86 {
87 GLubyte *ptr = rrb->bo->ptr;
88 GLint offset;
89 if (rrb->has_surface) {
90 offset = x * rrb->cpp + y * rrb->pitch;
91 } else {
92 GLuint b;
93 offset = 0;
94 b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
95 offset += (b >> 1) << 12;
96 offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
97 offset += ((y >> 2) & 0x3) << 9;
98 offset += ((x >> 2) & 0x1) << 8;
99 offset += ((x >> 3) & 0x3) << 6;
100 offset += ((y >> 1) & 0x1) << 5;
101 offset += ((x >> 1) & 0x1) << 4;
102 offset += (y & 0x1) << 3;
103 offset += (x & 0x1) << 2;
104 }
105 return &ptr[offset];
106 }
107 #endif
108
109 /* r600 tiling
110 * two main types:
111 * - 1D (akin to macro-linear/micro-tiled on older asics)
112 * - 2D (akin to macro-tiled/micro-tiled on older asics)
113 * only 1D tiling is implemented below
114 */
115 #if defined(RADEON_COMMON_FOR_R600)
116 static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
117 GLint x, GLint y, GLint is_depth, GLint is_stencil)
118 {
119 GLint element_bytes = rrb->cpp;
120 GLint num_samples = 1;
121 GLint tile_width = 8;
122 GLint tile_height = 8;
123 GLint tile_thickness = 1;
124 GLint pitch_elements = rrb->pitch / element_bytes;
125 GLint height = rrb->base.Height;
126 GLint z = 0;
127 GLint sample_number = 0;
128 /* */
129 GLint tile_bytes;
130 GLint tiles_per_row;
131 GLint tiles_per_slice;
132 GLint slice_offset;
133 GLint tile_row_index;
134 GLint tile_column_index;
135 GLint tile_offset;
136 GLint pixel_number = 0;
137 GLint element_offset;
138 GLint offset = 0;
139
140 tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
141 tiles_per_row = pitch_elements / tile_width;
142 tiles_per_slice = tiles_per_row * (height / tile_height);
143 slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
144 tile_row_index = y / tile_height;
145 tile_column_index = x / tile_width;
146 tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
147
148 if (is_depth) {
149 GLint pixel_offset = 0;
150
151 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
152 pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
153 pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
154 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
155 pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
156 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
157 switch (element_bytes) {
158 case 2:
159 pixel_offset = pixel_number * element_bytes * num_samples;
160 break;
161 case 4:
162 /* stencil and depth data are stored separately within a tile.
163 * stencil is stored in a contiguous tile before the depth tile.
164 * stencil element is 1 byte, depth element is 3 bytes.
165 * stencil tile is 64 bytes.
166 */
167 if (is_stencil)
168 pixel_offset = pixel_number * 1 * num_samples;
169 else
170 pixel_offset = (pixel_number * 3 * num_samples) + 64;
171 break;
172 }
173 element_offset = pixel_offset + (sample_number * element_bytes);
174 } else {
175 GLint sample_offset;
176
177 switch (element_bytes) {
178 case 1:
179 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
180 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
181 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
182 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
183 pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
184 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
185 break;
186 case 2:
187 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
188 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
189 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
190 pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
191 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
192 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
193 break;
194 case 4:
195 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
196 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
197 pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
198 pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
199 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
200 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
201 break;
202 }
203 sample_offset = sample_number * (tile_bytes / num_samples);
204 element_offset = sample_offset + (pixel_number * element_bytes);
205 }
206 offset = slice_offset + tile_offset + element_offset;
207 return offset;
208 }
209
210 /* depth buffers */
211 static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
212 GLint x, GLint y)
213 {
214 GLubyte *ptr = rrb->bo->ptr;
215 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
216 return &ptr[offset];
217 }
218
219 static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
220 GLint x, GLint y)
221 {
222 GLubyte *ptr = rrb->bo->ptr;
223 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
224 return &ptr[offset];
225 }
226
227 static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
228 GLint x, GLint y)
229 {
230 GLubyte *ptr = rrb->bo->ptr;
231 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
232 GLint offset;
233
234 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
235 offset = x * rrb->cpp + y * rrb->pitch;
236 } else {
237 offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
238 }
239 return &ptr[offset];
240 }
241
242 #endif
243
244 /* radeon tiling on r300-r500 has 4 states,
245 macro-linear/micro-linear
246 macro-linear/micro-tiled
247 macro-tiled /micro-linear
248 macro-tiled /micro-tiled
249 1 byte surface
250 2 byte surface - two types - we only provide 8x2 microtiling
251 4 byte surface
252 8/16 byte (unused)
253 */
254 static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
255 GLint x, GLint y)
256 {
257 GLubyte *ptr = rrb->bo->ptr;
258 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
259 GLint offset;
260
261 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
262 offset = x * rrb->cpp + y * rrb->pitch;
263 } else {
264 offset = 0;
265 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
266 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
267 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
268 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
269 offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
270 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
271 offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
272 offset += ((y >> 1) & 0x1) << 6;
273 offset += ((x >> 2) & 0x1) << 5;
274 offset += (y & 1) << 4;
275 offset += (x & 3) << 2;
276 } else {
277 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
278 offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
279 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
280 offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
281 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
282 offset += (y & 1) << 6;
283 offset += (x & 15) << 2;
284 }
285 } else {
286 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
287 offset += (y & 1) << 4;
288 offset += (x & 3) << 2;
289 }
290 }
291 return &ptr[offset];
292 }
293
294 static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
295 GLint x, GLint y)
296 {
297 GLubyte *ptr = rrb->bo->ptr;
298 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
299 GLint offset;
300
301 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
302 offset = x * rrb->cpp + y * rrb->pitch;
303 } else {
304 offset = 0;
305 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
306 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
307 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
308 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
309 offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
310 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
311 offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
312 offset += ((y >> 1) & 0x1) << 6;
313 offset += ((x >> 3) & 0x1) << 5;
314 offset += (y & 1) << 4;
315 offset += (x & 3) << 2;
316 } else {
317 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
318 offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
319 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
320 offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
321 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
322 offset += (y & 1) << 6;
323 offset += ((x >> 4) & 0x1) << 5;
324 offset += (x & 15) << 2;
325 }
326 } else {
327 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
328 offset += (y & 0x1) << 4;
329 offset += (x & 0x7) << 1;
330 }
331 }
332 return &ptr[offset];
333 }
334
335 #ifndef COMPILE_R300
336 static uint32_t
337 z24s8_to_s8z24(uint32_t val)
338 {
339 return (val << 24) | (val >> 8);
340 }
341
342 static uint32_t
343 s8z24_to_z24s8(uint32_t val)
344 {
345 return (val >> 24) | (val << 8);
346 }
347 #endif
348
349 /*
350 * Note that all information needed to access pixels in a renderbuffer
351 * should be obtained through the gl_renderbuffer parameter, not per-context
352 * information.
353 */
354 #define LOCAL_VARS \
355 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
356 struct radeon_renderbuffer *rrb = (void *) rb; \
357 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
358 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
359 unsigned int num_cliprects; \
360 struct drm_clip_rect *cliprects; \
361 int x_off, y_off; \
362 GLuint p; \
363 (void)p; \
364 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
365
366 #define LOCAL_DEPTH_VARS \
367 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
368 struct radeon_renderbuffer *rrb = (void *) rb; \
369 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
370 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
371 unsigned int num_cliprects; \
372 struct drm_clip_rect *cliprects; \
373 int x_off, y_off; \
374 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
375
376 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
377
378 #define Y_FLIP(_y) ((_y) * yScale + yBias)
379
380 #define HW_LOCK()
381
382 #define HW_UNLOCK()
383
384 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
385 * the cliprect info from the context, not the driDrawable.
386 * Move this into spantmp2.h someday.
387 */
388 #define HW_CLIPLOOP() \
389 do { \
390 int _nc = num_cliprects; \
391 while ( _nc-- ) { \
392 int minx = cliprects[_nc].x1 - x_off; \
393 int miny = cliprects[_nc].y1 - y_off; \
394 int maxx = cliprects[_nc].x2 - x_off; \
395 int maxy = cliprects[_nc].y2 - y_off;
396
397 /* ================================================================
398 * Color buffer
399 */
400
401 /* 16 bit, RGB565 color spanline and pixel functions
402 */
403 #define SPANTMP_PIXEL_FMT GL_RGB
404 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
405
406 #define TAG(x) radeon##x##_RGB565
407 #define TAG2(x,y) radeon##x##_RGB565##y
408 #if defined(RADEON_COMMON_FOR_R600)
409 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
410 #else
411 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
412 #endif
413 #include "spantmp2.h"
414
415 /* 16 bit, ARGB1555 color spanline and pixel functions
416 */
417 #define SPANTMP_PIXEL_FMT GL_BGRA
418 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
419
420 #define TAG(x) radeon##x##_ARGB1555
421 #define TAG2(x,y) radeon##x##_ARGB1555##y
422 #if defined(RADEON_COMMON_FOR_R600)
423 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
424 #else
425 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
426 #endif
427 #include "spantmp2.h"
428
429 /* 16 bit, RGBA4 color spanline and pixel functions
430 */
431 #define SPANTMP_PIXEL_FMT GL_BGRA
432 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
433
434 #define TAG(x) radeon##x##_ARGB4444
435 #define TAG2(x,y) radeon##x##_ARGB4444##y
436 #if defined(RADEON_COMMON_FOR_R600)
437 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
438 #else
439 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
440 #endif
441 #include "spantmp2.h"
442
443 /* 32 bit, xRGB8888 color spanline and pixel functions
444 */
445 #define SPANTMP_PIXEL_FMT GL_BGRA
446 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
447
448 #define TAG(x) radeon##x##_xRGB8888
449 #define TAG2(x,y) radeon##x##_xRGB8888##y
450 #if defined(RADEON_COMMON_FOR_R600)
451 #define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
452 #define PUT_VALUE(_x, _y, d) { \
453 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
454 *_ptr = d; \
455 } while (0)
456 #else
457 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
458 #define PUT_VALUE(_x, _y, d) { \
459 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
460 *_ptr = d; \
461 } while (0)
462 #endif
463 #include "spantmp2.h"
464
465 /* 32 bit, ARGB8888 color spanline and pixel functions
466 */
467 #define SPANTMP_PIXEL_FMT GL_BGRA
468 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
469
470 #define TAG(x) radeon##x##_ARGB8888
471 #define TAG2(x,y) radeon##x##_ARGB8888##y
472 #if defined(RADEON_COMMON_FOR_R600)
473 #define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
474 #define PUT_VALUE(_x, _y, d) { \
475 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
476 *_ptr = d; \
477 } while (0)
478 #else
479 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
480 #define PUT_VALUE(_x, _y, d) { \
481 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
482 *_ptr = d; \
483 } while (0)
484 #endif
485 #include "spantmp2.h"
486
487 /* ================================================================
488 * Depth buffer
489 */
490
491 /* The Radeon family has depth tiling on all the time, so we have to convert
492 * the x,y coordinates into the memory bus address (mba) in the same
493 * manner as the engine. In each case, the linear block address (ba)
494 * is calculated, and then wired with x and y to produce the final
495 * memory address.
496 * The chip will do address translation on its own if the surface registers
497 * are set up correctly. It is not quite enough to get it working with hyperz
498 * too...
499 */
500
501 /* 16-bit depth buffer functions
502 */
503 #define VALUE_TYPE GLushort
504
505 #if defined(RADEON_COMMON_FOR_R200)
506 #define WRITE_DEPTH( _x, _y, d ) \
507 *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
508 #elif defined(RADEON_COMMON_FOR_R600)
509 #define WRITE_DEPTH( _x, _y, d ) \
510 *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
511 #else
512 #define WRITE_DEPTH( _x, _y, d ) \
513 *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
514 #endif
515
516 #if defined(RADEON_COMMON_FOR_R200)
517 #define READ_DEPTH( d, _x, _y ) \
518 d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
519 #elif defined(RADEON_COMMON_FOR_R600)
520 #define READ_DEPTH( d, _x, _y ) \
521 d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
522 #else
523 #define READ_DEPTH( d, _x, _y ) \
524 d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
525 #endif
526
527 #define TAG(x) radeon##x##_z16
528 #include "depthtmp.h"
529
530 /* 24 bit depth
531 *
532 * Careful: It looks like the R300 uses ZZZS byte order while the R200
533 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
534 */
535 #define VALUE_TYPE GLuint
536
537 #if defined(COMPILE_R300)
538 #define WRITE_DEPTH( _x, _y, d ) \
539 do { \
540 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
541 GLuint tmp = *_ptr; \
542 tmp &= 0x000000ff; \
543 tmp |= ((d << 8) & 0xffffff00); \
544 *_ptr = tmp; \
545 } while (0)
546 #elif defined(RADEON_COMMON_FOR_R600)
547 #define WRITE_DEPTH( _x, _y, d ) \
548 do { \
549 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
550 GLuint tmp = *_ptr; \
551 tmp &= 0xff000000; \
552 tmp |= ((d) & 0x00ffffff); \
553 *_ptr = tmp; \
554 } while (0)
555 #elif defined(RADEON_COMMON_FOR_R200)
556 #define WRITE_DEPTH( _x, _y, d ) \
557 do { \
558 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
559 GLuint tmp = *_ptr; \
560 tmp &= 0xff000000; \
561 tmp |= ((d) & 0x00ffffff); \
562 *_ptr = tmp; \
563 } while (0)
564 #else
565 #define WRITE_DEPTH( _x, _y, d ) \
566 do { \
567 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
568 GLuint tmp = *_ptr; \
569 tmp &= 0xff000000; \
570 tmp |= ((d) & 0x00ffffff); \
571 *_ptr = tmp; \
572 } while (0)
573 #endif
574
575 #if defined(COMPILE_R300)
576 #define READ_DEPTH( d, _x, _y ) \
577 do { \
578 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
579 }while(0)
580 #elif defined(RADEON_COMMON_FOR_R600)
581 #define READ_DEPTH( d, _x, _y ) \
582 do { \
583 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
584 }while(0)
585 #elif defined(RADEON_COMMON_FOR_R200)
586 #define READ_DEPTH( d, _x, _y ) \
587 do { \
588 d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
589 }while(0)
590 #else
591 #define READ_DEPTH( d, _x, _y ) \
592 d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
593 #endif
594
595 #define TAG(x) radeon##x##_z24
596 #include "depthtmp.h"
597
598 /* 24 bit depth, 8 bit stencil depthbuffer functions
599 * EXT_depth_stencil
600 *
601 * Careful: It looks like the R300 uses ZZZS byte order while the R200
602 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
603 */
604 #define VALUE_TYPE GLuint
605
606 #if defined(COMPILE_R300)
607 #define WRITE_DEPTH( _x, _y, d ) \
608 do { \
609 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
610 *_ptr = d; \
611 } while (0)
612 #elif defined(RADEON_COMMON_FOR_R600)
613 #define WRITE_DEPTH( _x, _y, d ) \
614 do { \
615 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
616 GLuint tmp = *_ptr; \
617 tmp &= 0xff000000; \
618 tmp |= (((d) >> 8) & 0x00ffffff); \
619 *_ptr = tmp; \
620 _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
621 tmp = *_ptr; \
622 tmp &= 0xffffff00; \
623 tmp |= (d) & 0xff; \
624 *_ptr = tmp; \
625 } while (0)
626 #elif defined(RADEON_COMMON_FOR_R200)
627 #define WRITE_DEPTH( _x, _y, d ) \
628 do { \
629 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
630 GLuint tmp = z24s8_to_s8z24(d); \
631 *_ptr = tmp; \
632 } while (0)
633 #else
634 #define WRITE_DEPTH( _x, _y, d ) \
635 do { \
636 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
637 GLuint tmp = z24s8_to_s8z24(d); \
638 *_ptr = tmp; \
639 } while (0)
640 #endif
641
642 #if defined(COMPILE_R300)
643 #define READ_DEPTH( d, _x, _y ) \
644 do { \
645 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
646 }while(0)
647 #elif defined(RADEON_COMMON_FOR_R600)
648 #define READ_DEPTH( d, _x, _y ) \
649 do { \
650 d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
651 d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \
652 }while(0)
653 #elif defined(RADEON_COMMON_FOR_R200)
654 #define READ_DEPTH( d, _x, _y ) \
655 do { \
656 d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
657 }while(0)
658 #else
659 #define READ_DEPTH( d, _x, _y ) do { \
660 d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \
661 } while (0)
662 #endif
663
664 #define TAG(x) radeon##x##_z24_s8
665 #include "depthtmp.h"
666
667 /* ================================================================
668 * Stencil buffer
669 */
670
671 /* 24 bit depth, 8 bit stencil depthbuffer functions
672 */
673 #ifdef COMPILE_R300
674 #define WRITE_STENCIL( _x, _y, d ) \
675 do { \
676 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
677 GLuint tmp = *_ptr; \
678 tmp &= 0xffffff00; \
679 tmp |= (d) & 0xff; \
680 *_ptr = tmp; \
681 } while (0)
682 #elif defined(RADEON_COMMON_FOR_R600)
683 #define WRITE_STENCIL( _x, _y, d ) \
684 do { \
685 GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
686 GLuint tmp = *_ptr; \
687 tmp &= 0xffffff00; \
688 tmp |= (d) & 0xff; \
689 *_ptr = tmp; \
690 } while (0)
691 #elif defined(RADEON_COMMON_FOR_R200)
692 #define WRITE_STENCIL( _x, _y, d ) \
693 do { \
694 GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
695 GLuint tmp = *_ptr; \
696 tmp &= 0x00ffffff; \
697 tmp |= (((d) & 0xff) << 24); \
698 *_ptr = tmp; \
699 } while (0)
700 #else
701 #define WRITE_STENCIL( _x, _y, d ) \
702 do { \
703 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
704 GLuint tmp = *_ptr; \
705 tmp &= 0x00ffffff; \
706 tmp |= (((d) & 0xff) << 24); \
707 *_ptr = tmp; \
708 } while (0)
709 #endif
710
711 #ifdef COMPILE_R300
712 #define READ_STENCIL( d, _x, _y ) \
713 do { \
714 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
715 GLuint tmp = *_ptr; \
716 d = tmp & 0x000000ff; \
717 } while (0)
718 #elif defined(RADEON_COMMON_FOR_R600)
719 #define READ_STENCIL( d, _x, _y ) \
720 do { \
721 GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
722 GLuint tmp = *_ptr; \
723 d = tmp & 0x000000ff; \
724 } while (0)
725 #elif defined(RADEON_COMMON_FOR_R200)
726 #define READ_STENCIL( d, _x, _y ) \
727 do { \
728 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
729 GLuint tmp = *_ptr; \
730 d = (tmp & 0xff000000) >> 24; \
731 } while (0)
732 #else
733 #define READ_STENCIL( d, _x, _y ) \
734 do { \
735 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
736 GLuint tmp = *_ptr; \
737 d = (tmp & 0xff000000) >> 24; \
738 } while (0)
739 #endif
740
741 #define TAG(x) radeon##x##_z24_s8
742 #include "stenciltmp.h"
743
744
745 static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
746 {
747 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
748 int r;
749
750 if (rrb == NULL || !rrb->bo)
751 return;
752
753 if (flag) {
754 if (rrb->bo->bom->funcs->bo_wait)
755 radeon_bo_wait(rrb->bo);
756 r = radeon_bo_map(rrb->bo, 1);
757 if (r) {
758 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
759 __FUNCTION__, r);
760 }
761
762 radeonSetSpanFunctions(rrb);
763 } else {
764 radeon_bo_unmap(rrb->bo);
765 rb->GetRow = NULL;
766 rb->PutRow = NULL;
767 }
768 }
769
770 static void
771 radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
772 {
773 GLuint i, j;
774
775 /* color draw buffers */
776 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
777 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
778
779 /* check for render to textures */
780 for (i = 0; i < BUFFER_COUNT; i++) {
781 struct gl_renderbuffer_attachment *att =
782 ctx->DrawBuffer->Attachment + i;
783 struct gl_texture_object *tex = att->Texture;
784 if (tex) {
785 /* Render to texture. Note that a mipmapped texture need not
786 * be complete for render to texture, so we must restrict to
787 * mapping only the attached image.
788 */
789 radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
790 ASSERT(att->Renderbuffer);
791
792 if (map)
793 radeon_teximage_map(image, GL_TRUE);
794 else
795 radeon_teximage_unmap(image);
796 }
797 }
798
799 map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
800
801 /* depth buffer (Note wrapper!) */
802 if (ctx->DrawBuffer->_DepthBuffer)
803 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
804
805 if (ctx->DrawBuffer->_StencilBuffer)
806 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
807 }
808
809 static void radeonSpanRenderStart(GLcontext * ctx)
810 {
811 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
812 int i;
813
814 radeon_firevertices(rmesa);
815
816 /* The locking and wait for idle should really only be needed in classic mode.
817 * In a future memory manager based implementation, this should become
818 * unnecessary due to the fact that mapping our buffers, textures, etc.
819 * should implicitly wait for any previous rendering commands that must
820 * be waited on. */
821 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
822 LOCK_HARDWARE(rmesa);
823 radeonWaitForIdleLocked(rmesa);
824 }
825
826 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
827 if (ctx->Texture.Unit[i]._ReallyEnabled)
828 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
829 }
830
831 radeon_map_unmap_buffers(ctx, 1);
832 }
833
834 static void radeonSpanRenderFinish(GLcontext * ctx)
835 {
836 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
837 int i;
838 _swrast_flush(ctx);
839 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
840 UNLOCK_HARDWARE(rmesa);
841 }
842 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
843 if (ctx->Texture.Unit[i]._ReallyEnabled)
844 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
845 }
846
847 radeon_map_unmap_buffers(ctx, 0);
848 }
849
850 void radeonInitSpanFuncs(GLcontext * ctx)
851 {
852 struct swrast_device_driver *swdd =
853 _swrast_GetDeviceDriverReference(ctx);
854 swdd->SpanRenderStart = radeonSpanRenderStart;
855 swdd->SpanRenderFinish = radeonSpanRenderFinish;
856 }
857
858 /**
859 * Plug in the Get/Put routines for the given driRenderbuffer.
860 */
861 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
862 {
863 if (rrb->base._ActualFormat == GL_RGB5) {
864 radeonInitPointers_RGB565(&rrb->base);
865 } else if (rrb->base._ActualFormat == GL_RGB8) {
866 radeonInitPointers_xRGB8888(&rrb->base);
867 } else if (rrb->base._ActualFormat == GL_RGBA8) {
868 radeonInitPointers_ARGB8888(&rrb->base);
869 } else if (rrb->base._ActualFormat == GL_RGBA4) {
870 radeonInitPointers_ARGB4444(&rrb->base);
871 } else if (rrb->base._ActualFormat == GL_RGB5_A1) {
872 radeonInitPointers_ARGB1555(&rrb->base);
873 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
874 radeonInitDepthPointers_z16(&rrb->base);
875 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
876 radeonInitDepthPointers_z24(&rrb->base);
877 } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
878 radeonInitDepthPointers_z24_s8(&rrb->base);
879 } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
880 radeonInitStencilPointers_z24_s8(&rrb->base);
881 } else {
882 fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat);
883 }
884 }