src/mesa/drivers/dri/radeon/radeon_span.c

   1 /**************************************************************************
   2
   3 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   5                      VA Linux Systems Inc., Fremont, California.
   6
   7 The Weather Channel (TM) funded Tungsten Graphics to develop the
   8 initial release of the Radeon 8500 driver under the XFree86 license.
   9 This notice must be preserved.
  10
  11 All Rights Reserved.
  12
  13 Permission is hereby granted, free of charge, to any person obtaining
  14 a copy of this software and associated documentation files (the
  15 "Software"), to deal in the Software without restriction, including
  16 without limitation the rights to use, copy, modify, merge, publish,
  17 distribute, sublicense, and/or sell copies of the Software, and to
  18 permit persons to whom the Software is furnished to do so, subject to
  19 the following conditions:
  20
  21 The above copyright notice and this permission notice (including the
  22 next paragraph) shall be included in all copies or substantial
  23 portions of the Software.
  24
  25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  32
  33 **************************************************************************/
  34
  35 /*
  36  * Authors:
  37  *   Kevin E. Martin <martin@valinux.com>
  38  *   Gareth Hughes <gareth@valinux.com>
  39  *   Keith Whitwell <keith@tungstengraphics.com>
  40  *
  41  */
  42
  43 #include "main/glheader.h"
  44 #include "swrast/swrast.h"
  45
  46 #include "radeon_common.h"
  47 #include "radeon_lock.h"
  48 #include "radeon_span.h"
  49
  50 #define DBG 0
  51
  52 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
  53
  54
  55 /* r200 depth buffer is always tiled - this is the formula
  56    according to the docs unless I typo'ed in it
  57 */
  58 static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
  59                                  GLint x, GLint y)
  60 {
  61     GLubyte *ptr = rrb->bo->ptr;
  62     GLint offset;
  63     if (rrb->has_surface) {
  64         offset = x * rrb->cpp + y * rrb->pitch;
  65     } else {
  66         GLuint b;
  67         offset = 0;
  68         b = (((y  >> 4) * (rrb->pitch >> 8) + (x >> 6)));
  69         offset += (b >> 1) << 12;
  70         offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
  71         offset += ((y >> 2) & 0x3) << 9;
  72         offset += ((x >> 3) & 0x1) << 8;
  73         offset += ((x >> 4) & 0x3) << 6;
  74         offset += ((x >> 2) & 0x1) << 5;
  75         offset += ((y >> 1) & 0x1) << 4;
  76         offset += ((x >> 1) & 0x1) << 3;
  77         offset += (y & 0x1) << 2;
  78         offset += (x & 0x1) << 1;
  79     }
  80     return &ptr[offset];
  81 }
  82
  83 static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
  84                                  GLint x, GLint y)
  85 {
  86     GLubyte *ptr = rrb->bo->ptr;
  87     GLint offset;
  88     if (rrb->has_surface) {
  89         offset = x * rrb->cpp + y * rrb->pitch;
  90     } else {
  91         GLuint b;
  92         offset = 0;
  93         b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
  94         offset += (b >> 1) << 12;
  95         offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
  96         offset += ((y >> 2) & 0x3) << 9;
  97         offset += ((x >> 2) & 0x1) << 8;
  98         offset += ((x >> 3) & 0x3) << 6;
  99         offset += ((y >> 1) & 0x1) << 5;
 100         offset += ((x >> 1) & 0x1) << 4;
 101         offset += (y & 0x1) << 3;
 102         offset += (x & 0x1) << 2;
 103     }
 104     return &ptr[offset];
 105 }
 106
 107 /* radeon tiling on r300-r500 has 4 states,
 108    macro-linear/micro-linear
 109    macro-linear/micro-tiled
 110    macro-tiled /micro-linear
 111    macro-tiled /micro-tiled
 112    1 byte surface
 113    2 byte surface - two types - we only provide 8x2 microtiling
 114    4 byte surface
 115    8/16 byte (unused)
 116 */
 117 static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
 118                              GLint x, GLint y)
 119 {
 120     GLubyte *ptr = rrb->bo->ptr;
 121     uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
 122     GLint offset;
 123
 124     if (rrb->has_surface || !(rrb->bo->flags & mask)) {
 125         offset = x * rrb->cpp + y * rrb->pitch;
 126     } else {
 127         offset = 0;
 128         if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
 129             if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
 130                 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
 131                 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
 132                 offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
 133                 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
 134                 offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
 135                 offset += ((y >> 1) & 0x1) << 6;
 136                 offset += ((x >> 2) & 0x1) << 5;
 137                 offset += (y & 1) << 4;
 138                 offset += (x & 3) << 2;
 139             } else {
 140                 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
 141                 offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
 142                 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
 143                 offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
 144                 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
 145                 offset += (y & 1) << 6;
 146                 offset += (x & 15) << 2;
 147             }
 148         } else {
 149             offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
 150             offset += (y & 1) << 4;
 151             offset += (x & 3) << 2;
 152         }
 153     }
 154     return &ptr[offset];
 155 }
 156
 157 static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
 158                                      GLint x, GLint y)
 159 {
 160     GLubyte *ptr = rrb->bo->ptr;
 161     uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
 162     GLint offset;
 163
 164     if (rrb->has_surface || !(rrb->bo->flags & mask)) {
 165         offset = x * rrb->cpp + y * rrb->pitch;
 166     } else {
 167         offset = 0;
 168         if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
 169             if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
 170                 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
 171                 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
 172                 offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
 173                 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
 174                 offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
 175                 offset += ((y >> 1) & 0x1) << 6;
 176                 offset += ((x >> 3) & 0x1) << 5;
 177                 offset += (y & 1) << 4;
 178                 offset += (x & 3) << 2;
 179             } else {
 180                 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
 181                 offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
 182                 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
 183                 offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
 184                 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
 185                 offset += (y & 1) << 6;
 186                 offset += ((x >> 4) & 0x1) << 5;
 187                 offset += (x & 15) << 2;
 188             }
 189         } else {
 190             offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
 191             offset += (y & 0x1) << 4;
 192             offset += (x & 0x7) << 1;
 193         }
 194     }
 195     return &ptr[offset];
 196 }
 197
 198 #ifndef COMPILE_R300
 199 static uint32_t
 200 z24s8_to_s8z24(uint32_t val)
 201 {
 202    return (val << 24) | (val >> 8);
 203 }
 204
 205 static uint32_t
 206 s8z24_to_z24s8(uint32_t val)
 207 {
 208    return (val >> 24) | (val << 8);
 209 }
 210 #endif
 211
 212 /*
 213  * Note that all information needed to access pixels in a renderbuffer
 214  * should be obtained through the gl_renderbuffer parameter, not per-context
 215  * information.
 216  */
 217 #define LOCAL_VARS                                              \
 218    struct radeon_context *radeon = RADEON_CONTEXT(ctx);                 \
 219    struct radeon_renderbuffer *rrb = (void *) rb;               \
 220    const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;                 \
 221    const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
 222    unsigned int num_cliprects;                                          \
 223    struct drm_clip_rect *cliprects;                                     \
 224    int x_off, y_off;                                                    \
 225    GLuint p;                                            \
 226    (void)p;                                             \
 227    radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
 228
 229 #define LOCAL_DEPTH_VARS                                \
 230    struct radeon_context *radeon = RADEON_CONTEXT(ctx);                 \
 231    struct radeon_renderbuffer *rrb = (void *) rb;       \
 232    const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;                 \
 233    const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
 234    unsigned int num_cliprects;                                          \
 235    struct drm_clip_rect *cliprects;                                     \
 236    int x_off, y_off;                                                    \
 237   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
 238
 239 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
 240
 241 #define Y_FLIP(_y) ((_y) * yScale + yBias)
 242
 243 #define HW_LOCK()
 244
 245 #define HW_UNLOCK()
 246
 247 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
 248  * the cliprect info from the context, not the driDrawable.
 249  * Move this into spantmp2.h someday.
 250  */
 251 #define HW_CLIPLOOP()                                                   \
 252    do {                                                                 \
 253       int _nc = num_cliprects;                                          \
 254       while ( _nc-- ) {                                                 \
 255          int minx = cliprects[_nc].x1 - x_off;                          \
 256          int miny = cliprects[_nc].y1 - y_off;                          \
 257          int maxx = cliprects[_nc].x2 - x_off;                          \
 258          int maxy = cliprects[_nc].y2 - y_off;
 259
 260 /* ================================================================
 261  * Color buffer
 262  */
 263
 264 /* 16 bit, RGB565 color spanline and pixel functions
 265  */
 266 #define SPANTMP_PIXEL_FMT GL_RGB
 267 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
 268
 269 #define TAG(x)    radeon##x##_RGB565
 270 #define TAG2(x,y) radeon##x##_RGB565##y
 271 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
 272 #include "spantmp2.h"
 273
 274 /* 16 bit, ARGB1555 color spanline and pixel functions
 275  */
 276 #define SPANTMP_PIXEL_FMT GL_BGRA
 277 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
 278
 279 #define TAG(x)    radeon##x##_ARGB1555
 280 #define TAG2(x,y) radeon##x##_ARGB1555##y
 281 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
 282 #include "spantmp2.h"
 283
 284 /* 16 bit, RGBA4 color spanline and pixel functions
 285  */
 286 #define SPANTMP_PIXEL_FMT GL_BGRA
 287 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
 288
 289 #define TAG(x)    radeon##x##_ARGB4444
 290 #define TAG2(x,y) radeon##x##_ARGB4444##y
 291 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
 292 #include "spantmp2.h"
 293
 294 /* 32 bit, xRGB8888 color spanline and pixel functions
 295  */
 296 #define SPANTMP_PIXEL_FMT GL_BGRA
 297 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
 298
 299 #define TAG(x)    radeon##x##_xRGB8888
 300 #define TAG2(x,y) radeon##x##_xRGB8888##y
 301 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
 302 #define PUT_VALUE(_x, _y, d) { \
 303    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 304    *_ptr = d;                                                           \
 305 } while (0)
 306 #include "spantmp2.h"
 307
 308 /* 32 bit, ARGB8888 color spanline and pixel functions
 309  */
 310 #define SPANTMP_PIXEL_FMT GL_BGRA
 311 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
 312
 313 #define TAG(x)    radeon##x##_ARGB8888
 314 #define TAG2(x,y) radeon##x##_ARGB8888##y
 315 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
 316 #define PUT_VALUE(_x, _y, d) { \
 317    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 318    *_ptr = d;                                                           \
 319 } while (0)
 320 #include "spantmp2.h"
 321
 322 /* ================================================================
 323  * Depth buffer
 324  */
 325
 326 /* The Radeon family has depth tiling on all the time, so we have to convert
 327  * the x,y coordinates into the memory bus address (mba) in the same
 328  * manner as the engine.  In each case, the linear block address (ba)
 329  * is calculated, and then wired with x and y to produce the final
 330  * memory address.
 331  * The chip will do address translation on its own if the surface registers
 332  * are set up correctly. It is not quite enough to get it working with hyperz
 333  * too...
 334  */
 335
 336 /* 16-bit depth buffer functions
 337  */
 338 #define VALUE_TYPE GLushort
 339
 340 #if defined(RADEON_COMMON_FOR_R200)
 341 #define WRITE_DEPTH( _x, _y, d )                                        \
 342    *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
 343 #else
 344 #define WRITE_DEPTH( _x, _y, d )                                        \
 345    *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
 346 #endif
 347
 348 #if defined(RADEON_COMMON_FOR_R200)
 349 #define READ_DEPTH( d, _x, _y )                                         \
 350    d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
 351 #else
 352 #define READ_DEPTH( d, _x, _y )                                         \
 353    d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
 354 #endif
 355
 356 #define TAG(x) radeon##x##_z16
 357 #include "depthtmp.h"
 358
 359 /* 24 bit depth
 360  *
 361  * Careful: It looks like the R300 uses ZZZS byte order while the R200
 362  * uses SZZZ for 24 bit depth, 8 bit stencil mode.
 363  */
 364 #define VALUE_TYPE GLuint
 365
 366 #if defined(COMPILE_R300)
 367 #define WRITE_DEPTH( _x, _y, d )                                        \
 368 do {                                                                    \
 369    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 370    GLuint tmp = *_ptr;                          \
 371    tmp &= 0x000000ff;                                                   \
 372    tmp |= ((d << 8) & 0xffffff00);                                      \
 373    *_ptr = tmp;                                 \
 374 } while (0)
 375 #elif defined(RADEON_COMMON_FOR_R200)
 376 #define WRITE_DEPTH( _x, _y, d )                                        \
 377 do {                                                                    \
 378    GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );             \
 379    GLuint tmp = *_ptr;                          \
 380    tmp &= 0xff000000;                                                   \
 381    tmp |= ((d) & 0x00ffffff);                                           \
 382    *_ptr = tmp;                                 \
 383 } while (0)
 384 #else
 385 #define WRITE_DEPTH( _x, _y, d )                                        \
 386 do {                                                                    \
 387    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );     \
 388    GLuint tmp = *_ptr;                                                  \
 389    tmp &= 0xff000000;                                                   \
 390    tmp |= ((d) & 0x00ffffff);                                           \
 391    *_ptr = tmp;                                 \
 392 } while (0)
 393 #endif
 394
 395 #if defined(COMPILE_R300)
 396 #define READ_DEPTH( d, _x, _y )                                         \
 397   do {                                                                  \
 398     d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
 399   }while(0)
 400 #elif defined(RADEON_COMMON_FOR_R200)
 401 #define READ_DEPTH( d, _x, _y )                                         \
 402   do {                                                                  \
 403     d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
 404   }while(0)
 405 #else
 406 #define READ_DEPTH( d, _x, _y ) \
 407   d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,      _y + y_off)) & 0x00ffffff;
 408 #endif
 409
 410 #define TAG(x) radeon##x##_z24
 411 #include "depthtmp.h"
 412
 413 /* 24 bit depth, 8 bit stencil depthbuffer functions
 414  * EXT_depth_stencil
 415  *
 416  * Careful: It looks like the R300 uses ZZZS byte order while the R200
 417  * uses SZZZ for 24 bit depth, 8 bit stencil mode.
 418  */
 419 #define VALUE_TYPE GLuint
 420
 421 #if defined(COMPILE_R300)
 422 #define WRITE_DEPTH( _x, _y, d )                                        \
 423 do {                                                                    \
 424    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 425    *_ptr = d;                                                           \
 426 } while (0)
 427 #elif defined(RADEON_COMMON_FOR_R200)
 428 #define WRITE_DEPTH( _x, _y, d )                                        \
 429 do {                                                                    \
 430    GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );             \
 431    GLuint tmp = z24s8_to_s8z24(d);                                      \
 432    *_ptr = tmp;                                                         \
 433 } while (0)
 434 #else
 435 #define WRITE_DEPTH( _x, _y, d )                                        \
 436 do {                                                                    \
 437    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );     \
 438    GLuint tmp = z24s8_to_s8z24(d);                                      \
 439    *_ptr = tmp;                                 \
 440 } while (0)
 441 #endif
 442
 443 #if defined(COMPILE_R300)
 444 #define READ_DEPTH( d, _x, _y )                                         \
 445   do { \
 446     d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)));    \
 447   }while(0)
 448 #elif defined(RADEON_COMMON_FOR_R200)
 449 #define READ_DEPTH( d, _x, _y )                                         \
 450   do { \
 451     d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)));      \
 452   }while(0)
 453 #else
 454 #define READ_DEPTH( d, _x, _y ) do {                                    \
 455     d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,     _y + y_off ))); \
 456   } while (0)
 457 #endif
 458
 459 #define TAG(x) radeon##x##_z24_s8
 460 #include "depthtmp.h"
 461
 462 /* ================================================================
 463  * Stencil buffer
 464  */
 465
 466 /* 24 bit depth, 8 bit stencil depthbuffer functions
 467  */
 468 #ifdef COMPILE_R300
 469 #define WRITE_STENCIL( _x, _y, d )                                      \
 470 do {                                                                    \
 471    GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);               \
 472    GLuint tmp = *_ptr;                          \
 473    tmp &= 0xffffff00;                                                   \
 474    tmp |= (d) & 0xff;                                                   \
 475    *_ptr = tmp;                                 \
 476 } while (0)
 477 #elif defined(RADEON_COMMON_FOR_R200)
 478 #define WRITE_STENCIL( _x, _y, d )                                      \
 479 do {                                                                    \
 480    GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off);               \
 481    GLuint tmp = *_ptr;                          \
 482    tmp &= 0x00ffffff;                                                   \
 483    tmp |= (((d) & 0xff) << 24);                                         \
 484    *_ptr = tmp;                                 \
 485 } while (0)
 486 #else
 487 #define WRITE_STENCIL( _x, _y, d )                                      \
 488 do {                                                                    \
 489    GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);               \
 490    GLuint tmp = *_ptr;                          \
 491    tmp &= 0x00ffffff;                                                   \
 492    tmp |= (((d) & 0xff) << 24);                                         \
 493    *_ptr = tmp;                                 \
 494 } while (0)
 495 #endif
 496
 497 #ifdef COMPILE_R300
 498 #define READ_STENCIL( d, _x, _y )                                       \
 499 do {                                                                    \
 500    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 501    GLuint tmp = *_ptr;                          \
 502    d = tmp & 0x000000ff;                                                \
 503 } while (0)
 504 #elif defined(RADEON_COMMON_FOR_R200)
 505 #define READ_STENCIL( d, _x, _y )                                       \
 506 do {                                                                    \
 507    GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );             \
 508    GLuint tmp = *_ptr;                          \
 509    d = (tmp & 0xff000000) >> 24;                                        \
 510 } while (0)
 511 #else
 512 #define READ_STENCIL( d, _x, _y )                                       \
 513 do {                                                                    \
 514    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 515    GLuint tmp = *_ptr;                          \
 516    d = (tmp & 0xff000000) >> 24;                                        \
 517 } while (0)
 518 #endif
 519
 520 #define TAG(x) radeon##x##_z24_s8
 521 #include "stenciltmp.h"
 522
 523
 524 static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
 525 {
 526         struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
 527         int r;
 528
 529         if (rrb == NULL || !rrb->bo)
 530                 return;
 531
 532         if (flag) {
 533                 if (rrb->bo->bom->funcs->bo_wait)
 534                         radeon_bo_wait(rrb->bo);
 535                 r = radeon_bo_map(rrb->bo, 1);
 536                 if (r) {
 537                         fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
 538                                 __FUNCTION__, r);
 539                 }
 540
 541                 radeonSetSpanFunctions(rrb);
 542         } else {
 543                 radeon_bo_unmap(rrb->bo);
 544                 rb->GetRow = NULL;
 545                 rb->PutRow = NULL;
 546         }
 547 }
 548
 549 static void
 550 radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
 551 {
 552         GLuint i, j;
 553
 554         /* color draw buffers */
 555         for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
 556                 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
 557
 558         /* check for render to textures */
 559         for (i = 0; i < BUFFER_COUNT; i++) {
 560                 struct gl_renderbuffer_attachment *att =
 561                         ctx->DrawBuffer->Attachment + i;
 562                 struct gl_texture_object *tex = att->Texture;
 563                 if (tex) {
 564                         /* Render to texture. Note that a mipmapped texture need not
 565                          * be complete for render to texture, so we must restrict to
 566                          * mapping only the attached image.
 567                          */
 568                         radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
 569                         ASSERT(att->Renderbuffer);
 570
 571                         if (map)
 572                                 radeon_teximage_map(image, GL_TRUE);
 573                         else
 574                                 radeon_teximage_unmap(image);
 575                 }
 576         }
 577
 578         map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
 579
 580         /* depth buffer (Note wrapper!) */
 581         if (ctx->DrawBuffer->_DepthBuffer)
 582                 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
 583
 584         if (ctx->DrawBuffer->_StencilBuffer)
 585                 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
 586 }
 587
 588 static void radeonSpanRenderStart(GLcontext * ctx)
 589 {
 590         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 591         int i;
 592
 593         radeon_firevertices(rmesa);
 594
 595         /* The locking and wait for idle should really only be needed in classic mode.
 596          * In a future memory manager based implementation, this should become
 597          * unnecessary due to the fact that mapping our buffers, textures, etc.
 598          * should implicitly wait for any previous rendering commands that must
 599          * be waited on. */
 600         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
 601                 LOCK_HARDWARE(rmesa);
 602                 radeonWaitForIdleLocked(rmesa);
 603         }
 604
 605         for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
 606                 if (ctx->Texture.Unit[i]._ReallyEnabled)
 607                         ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
 608         }
 609
 610         radeon_map_unmap_buffers(ctx, 1);
 611 }
 612
 613 static void radeonSpanRenderFinish(GLcontext * ctx)
 614 {
 615         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 616         int i;
 617         _swrast_flush(ctx);
 618         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
 619                 UNLOCK_HARDWARE(rmesa);
 620         }
 621         for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
 622                 if (ctx->Texture.Unit[i]._ReallyEnabled)
 623                         ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
 624         }
 625
 626         radeon_map_unmap_buffers(ctx, 0);
 627 }
 628
 629 void radeonInitSpanFuncs(GLcontext * ctx)
 630 {
 631         struct swrast_device_driver *swdd =
 632             _swrast_GetDeviceDriverReference(ctx);
 633         swdd->SpanRenderStart = radeonSpanRenderStart;
 634         swdd->SpanRenderFinish = radeonSpanRenderFinish;
 635 }
 636
 637 /**
 638  * Plug in the Get/Put routines for the given driRenderbuffer.
 639  */
 640 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
 641 {
 642         if (rrb->base._ActualFormat == GL_RGB5) {
 643                 radeonInitPointers_RGB565(&rrb->base);
 644         } else if (rrb->base._ActualFormat == GL_RGB8) {
 645                 radeonInitPointers_xRGB8888(&rrb->base);
 646         } else if (rrb->base._ActualFormat == GL_RGBA8) {
 647                 radeonInitPointers_ARGB8888(&rrb->base);
 648         } else if (rrb->base._ActualFormat == GL_RGBA4) {
 649                 radeonInitPointers_ARGB4444(&rrb->base);
 650         } else if (rrb->base._ActualFormat == GL_RGB5_A1) {
 651                 radeonInitPointers_ARGB1555(&rrb->base);
 652         } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
 653                 radeonInitDepthPointers_z16(&rrb->base);
 654         } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
 655                 radeonInitDepthPointers_z24(&rrb->base);
 656         } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
 657                 radeonInitDepthPointers_z24_s8(&rrb->base);
 658         } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
 659                 radeonInitStencilPointers_z24_s8(&rrb->base);
 660         } else {
 661                 fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat);
 662         }
 663 }