src/mesa/drivers/dri/radeon/radeon_span.c

   1 /**************************************************************************
   2
   3 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   5                      VA Linux Systems Inc., Fremont, California.
   6
   7 The Weather Channel (TM) funded Tungsten Graphics to develop the
   8 initial release of the Radeon 8500 driver under the XFree86 license.
   9 This notice must be preserved.
  10
  11 All Rights Reserved.
  12
  13 Permission is hereby granted, free of charge, to any person obtaining
  14 a copy of this software and associated documentation files (the
  15 "Software"), to deal in the Software without restriction, including
  16 without limitation the rights to use, copy, modify, merge, publish,
  17 distribute, sublicense, and/or sell copies of the Software, and to
  18 permit persons to whom the Software is furnished to do so, subject to
  19 the following conditions:
  20
  21 The above copyright notice and this permission notice (including the
  22 next paragraph) shall be included in all copies or substantial
  23 portions of the Software.
  24
  25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  32
  33 **************************************************************************/
  34
  35 /*
  36  * Authors:
  37  *   Kevin E. Martin <martin@valinux.com>
  38  *   Gareth Hughes <gareth@valinux.com>
  39  *   Keith Whitwell <keith@tungstengraphics.com>
  40  *
  41  */
  42
  43 #include "main/glheader.h"
  44 #include "swrast/swrast.h"
  45
  46 #include "radeon_common.h"
  47 #include "radeon_lock.h"
  48 #include "radeon_span.h"
  49
  50 #define DBG 0
  51
  52 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
  53
  54
  55 /* r200 depth buffer is always tiled - this is the formula
  56    according to the docs unless I typo'ed in it
  57 */
  58 #if defined(RADEON_COMMON_FOR_R200)
  59 static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
  60                                  GLint x, GLint y)
  61 {
  62     GLubyte *ptr = rrb->bo->ptr;
  63     GLint offset;
  64     if (rrb->has_surface) {
  65         offset = x * rrb->cpp + y * rrb->pitch;
  66     } else {
  67         GLuint b;
  68         offset = 0;
  69         b = (((y  >> 4) * (rrb->pitch >> 8) + (x >> 6)));
  70         offset += (b >> 1) << 12;
  71         offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
  72         offset += ((y >> 2) & 0x3) << 9;
  73         offset += ((x >> 3) & 0x1) << 8;
  74         offset += ((x >> 4) & 0x3) << 6;
  75         offset += ((x >> 2) & 0x1) << 5;
  76         offset += ((y >> 1) & 0x1) << 4;
  77         offset += ((x >> 1) & 0x1) << 3;
  78         offset += (y & 0x1) << 2;
  79         offset += (x & 0x1) << 1;
  80     }
  81     return &ptr[offset];
  82 }
  83
  84 static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
  85                                  GLint x, GLint y)
  86 {
  87     GLubyte *ptr = rrb->bo->ptr;
  88     GLint offset;
  89     if (rrb->has_surface) {
  90         offset = x * rrb->cpp + y * rrb->pitch;
  91     } else {
  92         GLuint b;
  93         offset = 0;
  94         b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
  95         offset += (b >> 1) << 12;
  96         offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
  97         offset += ((y >> 2) & 0x3) << 9;
  98         offset += ((x >> 2) & 0x1) << 8;
  99         offset += ((x >> 3) & 0x3) << 6;
 100         offset += ((y >> 1) & 0x1) << 5;
 101         offset += ((x >> 1) & 0x1) << 4;
 102         offset += (y & 0x1) << 3;
 103         offset += (x & 0x1) << 2;
 104     }
 105     return &ptr[offset];
 106 }
 107 #endif
 108
 109 /* radeon tiling on r300-r500 has 4 states,
 110    macro-linear/micro-linear
 111    macro-linear/micro-tiled
 112    macro-tiled /micro-linear
 113    macro-tiled /micro-tiled
 114    1 byte surface
 115    2 byte surface - two types - we only provide 8x2 microtiling
 116    4 byte surface
 117    8/16 byte (unused)
 118 */
 119 static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
 120                              GLint x, GLint y)
 121 {
 122     GLubyte *ptr = rrb->bo->ptr;
 123     uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
 124     GLint offset;
 125
 126     if (rrb->has_surface || !(rrb->bo->flags & mask)) {
 127         offset = x * rrb->cpp + y * rrb->pitch;
 128     } else {
 129         offset = 0;
 130         if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
 131             if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
 132                 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
 133                 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
 134                 offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
 135                 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
 136                 offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
 137                 offset += ((y >> 1) & 0x1) << 6;
 138                 offset += ((x >> 2) & 0x1) << 5;
 139                 offset += (y & 1) << 4;
 140                 offset += (x & 3) << 2;
 141             } else {
 142                 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
 143                 offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
 144                 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
 145                 offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
 146                 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
 147                 offset += (y & 1) << 6;
 148                 offset += (x & 15) << 2;
 149             }
 150         } else {
 151             offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
 152             offset += (y & 1) << 4;
 153             offset += (x & 3) << 2;
 154         }
 155     }
 156     return &ptr[offset];
 157 }
 158
 159 static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
 160                                      GLint x, GLint y)
 161 {
 162     GLubyte *ptr = rrb->bo->ptr;
 163     uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
 164     GLint offset;
 165
 166     if (rrb->has_surface || !(rrb->bo->flags & mask)) {
 167         offset = x * rrb->cpp + y * rrb->pitch;
 168     } else {
 169         offset = 0;
 170         if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
 171             if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
 172                 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
 173                 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
 174                 offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
 175                 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
 176                 offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
 177                 offset += ((y >> 1) & 0x1) << 6;
 178                 offset += ((x >> 3) & 0x1) << 5;
 179                 offset += (y & 1) << 4;
 180                 offset += (x & 3) << 2;
 181             } else {
 182                 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
 183                 offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
 184                 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
 185                 offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
 186                 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
 187                 offset += (y & 1) << 6;
 188                 offset += ((x >> 4) & 0x1) << 5;
 189                 offset += (x & 15) << 2;
 190             }
 191         } else {
 192             offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
 193             offset += (y & 0x1) << 4;
 194             offset += (x & 0x7) << 1;
 195         }
 196     }
 197     return &ptr[offset];
 198 }
 199
 200 #ifndef COMPILE_R300
 201 static uint32_t
 202 z24s8_to_s8z24(uint32_t val)
 203 {
 204    return (val << 24) | (val >> 8);
 205 }
 206
 207 static uint32_t
 208 s8z24_to_z24s8(uint32_t val)
 209 {
 210    return (val >> 24) | (val << 8);
 211 }
 212 #endif
 213
 214 /*
 215  * Note that all information needed to access pixels in a renderbuffer
 216  * should be obtained through the gl_renderbuffer parameter, not per-context
 217  * information.
 218  */
 219 #define LOCAL_VARS                                              \
 220    struct radeon_context *radeon = RADEON_CONTEXT(ctx);                 \
 221    struct radeon_renderbuffer *rrb = (void *) rb;               \
 222    const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;                 \
 223    const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
 224    unsigned int num_cliprects;                                          \
 225    struct drm_clip_rect *cliprects;                                     \
 226    int x_off, y_off;                                                    \
 227    GLuint p;                                            \
 228    (void)p;                                             \
 229    radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
 230
 231 #define LOCAL_DEPTH_VARS                                \
 232    struct radeon_context *radeon = RADEON_CONTEXT(ctx);                 \
 233    struct radeon_renderbuffer *rrb = (void *) rb;       \
 234    const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;                 \
 235    const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
 236    unsigned int num_cliprects;                                          \
 237    struct drm_clip_rect *cliprects;                                     \
 238    int x_off, y_off;                                                    \
 239   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
 240
 241 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
 242
 243 #define Y_FLIP(_y) ((_y) * yScale + yBias)
 244
 245 #define HW_LOCK()
 246
 247 #define HW_UNLOCK()
 248
 249 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
 250  * the cliprect info from the context, not the driDrawable.
 251  * Move this into spantmp2.h someday.
 252  */
 253 #define HW_CLIPLOOP()                                                   \
 254    do {                                                                 \
 255       int _nc = num_cliprects;                                          \
 256       while ( _nc-- ) {                                                 \
 257          int minx = cliprects[_nc].x1 - x_off;                          \
 258          int miny = cliprects[_nc].y1 - y_off;                          \
 259          int maxx = cliprects[_nc].x2 - x_off;                          \
 260          int maxy = cliprects[_nc].y2 - y_off;
 261
 262 /* ================================================================
 263  * Color buffer
 264  */
 265
 266 /* 16 bit, RGB565 color spanline and pixel functions
 267  */
 268 #define SPANTMP_PIXEL_FMT GL_RGB
 269 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
 270
 271 #define TAG(x)    radeon##x##_RGB565
 272 #define TAG2(x,y) radeon##x##_RGB565##y
 273 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
 274 #include "spantmp2.h"
 275
 276 /* 16 bit, ARGB1555 color spanline and pixel functions
 277  */
 278 #define SPANTMP_PIXEL_FMT GL_BGRA
 279 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
 280
 281 #define TAG(x)    radeon##x##_ARGB1555
 282 #define TAG2(x,y) radeon##x##_ARGB1555##y
 283 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
 284 #include "spantmp2.h"
 285
 286 /* 16 bit, RGBA4 color spanline and pixel functions
 287  */
 288 #define SPANTMP_PIXEL_FMT GL_BGRA
 289 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
 290
 291 #define TAG(x)    radeon##x##_ARGB4444
 292 #define TAG2(x,y) radeon##x##_ARGB4444##y
 293 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
 294 #include "spantmp2.h"
 295
 296 /* 32 bit, xRGB8888 color spanline and pixel functions
 297  */
 298 #define SPANTMP_PIXEL_FMT GL_BGRA
 299 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
 300
 301 #define TAG(x)    radeon##x##_xRGB8888
 302 #define TAG2(x,y) radeon##x##_xRGB8888##y
 303 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
 304 #define PUT_VALUE(_x, _y, d) { \
 305    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 306    *_ptr = d;                                                           \
 307 } while (0)
 308 #include "spantmp2.h"
 309
 310 /* 32 bit, ARGB8888 color spanline and pixel functions
 311  */
 312 #define SPANTMP_PIXEL_FMT GL_BGRA
 313 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
 314
 315 #define TAG(x)    radeon##x##_ARGB8888
 316 #define TAG2(x,y) radeon##x##_ARGB8888##y
 317 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
 318 #define PUT_VALUE(_x, _y, d) { \
 319    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 320    *_ptr = d;                                                           \
 321 } while (0)
 322 #include "spantmp2.h"
 323
 324 /* ================================================================
 325  * Depth buffer
 326  */
 327
 328 /* The Radeon family has depth tiling on all the time, so we have to convert
 329  * the x,y coordinates into the memory bus address (mba) in the same
 330  * manner as the engine.  In each case, the linear block address (ba)
 331  * is calculated, and then wired with x and y to produce the final
 332  * memory address.
 333  * The chip will do address translation on its own if the surface registers
 334  * are set up correctly. It is not quite enough to get it working with hyperz
 335  * too...
 336  */
 337
 338 /* 16-bit depth buffer functions
 339  */
 340 #define VALUE_TYPE GLushort
 341
 342 #if defined(RADEON_COMMON_FOR_R200)
 343 #define WRITE_DEPTH( _x, _y, d )                                        \
 344    *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
 345 #else
 346 #define WRITE_DEPTH( _x, _y, d )                                        \
 347    *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
 348 #endif
 349
 350 #if defined(RADEON_COMMON_FOR_R200)
 351 #define READ_DEPTH( d, _x, _y )                                         \
 352    d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
 353 #else
 354 #define READ_DEPTH( d, _x, _y )                                         \
 355    d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
 356 #endif
 357
 358 #define TAG(x) radeon##x##_z16
 359 #include "depthtmp.h"
 360
 361 /* 24 bit depth
 362  *
 363  * Careful: It looks like the R300 uses ZZZS byte order while the R200
 364  * uses SZZZ for 24 bit depth, 8 bit stencil mode.
 365  */
 366 #define VALUE_TYPE GLuint
 367
 368 #if defined(COMPILE_R300)
 369 #define WRITE_DEPTH( _x, _y, d )                                        \
 370 do {                                                                    \
 371    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 372    GLuint tmp = *_ptr;                          \
 373    tmp &= 0x000000ff;                                                   \
 374    tmp |= ((d << 8) & 0xffffff00);                                      \
 375    *_ptr = tmp;                                 \
 376 } while (0)
 377 #elif defined(RADEON_COMMON_FOR_R200)
 378 #define WRITE_DEPTH( _x, _y, d )                                        \
 379 do {                                                                    \
 380    GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );             \
 381    GLuint tmp = *_ptr;                          \
 382    tmp &= 0xff000000;                                                   \
 383    tmp |= ((d) & 0x00ffffff);                                           \
 384    *_ptr = tmp;                                 \
 385 } while (0)
 386 #else
 387 #define WRITE_DEPTH( _x, _y, d )                                        \
 388 do {                                                                    \
 389    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );     \
 390    GLuint tmp = *_ptr;                                                  \
 391    tmp &= 0xff000000;                                                   \
 392    tmp |= ((d) & 0x00ffffff);                                           \
 393    *_ptr = tmp;                                 \
 394 } while (0)
 395 #endif
 396
 397 #if defined(COMPILE_R300)
 398 #define READ_DEPTH( d, _x, _y )                                         \
 399   do {                                                                  \
 400     d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
 401   }while(0)
 402 #elif defined(RADEON_COMMON_FOR_R200)
 403 #define READ_DEPTH( d, _x, _y )                                         \
 404   do {                                                                  \
 405     d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
 406   }while(0)
 407 #else
 408 #define READ_DEPTH( d, _x, _y ) \
 409   d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,      _y + y_off)) & 0x00ffffff;
 410 #endif
 411
 412 #define TAG(x) radeon##x##_z24
 413 #include "depthtmp.h"
 414
 415 /* 24 bit depth, 8 bit stencil depthbuffer functions
 416  * EXT_depth_stencil
 417  *
 418  * Careful: It looks like the R300 uses ZZZS byte order while the R200
 419  * uses SZZZ for 24 bit depth, 8 bit stencil mode.
 420  */
 421 #define VALUE_TYPE GLuint
 422
 423 #if defined(COMPILE_R300)
 424 #define WRITE_DEPTH( _x, _y, d )                                        \
 425 do {                                                                    \
 426    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 427    *_ptr = d;                                                           \
 428 } while (0)
 429 #elif defined(RADEON_COMMON_FOR_R200)
 430 #define WRITE_DEPTH( _x, _y, d )                                        \
 431 do {                                                                    \
 432    GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );             \
 433    GLuint tmp = z24s8_to_s8z24(d);                                      \
 434    *_ptr = tmp;                                                         \
 435 } while (0)
 436 #else
 437 #define WRITE_DEPTH( _x, _y, d )                                        \
 438 do {                                                                    \
 439    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );     \
 440    GLuint tmp = z24s8_to_s8z24(d);                                      \
 441    *_ptr = tmp;                                 \
 442 } while (0)
 443 #endif
 444
 445 #if defined(COMPILE_R300)
 446 #define READ_DEPTH( d, _x, _y )                                         \
 447   do { \
 448     d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)));    \
 449   }while(0)
 450 #elif defined(RADEON_COMMON_FOR_R200)
 451 #define READ_DEPTH( d, _x, _y )                                         \
 452   do { \
 453     d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)));      \
 454   }while(0)
 455 #else
 456 #define READ_DEPTH( d, _x, _y ) do {                                    \
 457     d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,     _y + y_off ))); \
 458   } while (0)
 459 #endif
 460
 461 #define TAG(x) radeon##x##_z24_s8
 462 #include "depthtmp.h"
 463
 464 /* ================================================================
 465  * Stencil buffer
 466  */
 467
 468 /* 24 bit depth, 8 bit stencil depthbuffer functions
 469  */
 470 #ifdef COMPILE_R300
 471 #define WRITE_STENCIL( _x, _y, d )                                      \
 472 do {                                                                    \
 473    GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);               \
 474    GLuint tmp = *_ptr;                          \
 475    tmp &= 0xffffff00;                                                   \
 476    tmp |= (d) & 0xff;                                                   \
 477    *_ptr = tmp;                                 \
 478 } while (0)
 479 #elif defined(RADEON_COMMON_FOR_R200)
 480 #define WRITE_STENCIL( _x, _y, d )                                      \
 481 do {                                                                    \
 482    GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off);               \
 483    GLuint tmp = *_ptr;                          \
 484    tmp &= 0x00ffffff;                                                   \
 485    tmp |= (((d) & 0xff) << 24);                                         \
 486    *_ptr = tmp;                                 \
 487 } while (0)
 488 #else
 489 #define WRITE_STENCIL( _x, _y, d )                                      \
 490 do {                                                                    \
 491    GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);               \
 492    GLuint tmp = *_ptr;                          \
 493    tmp &= 0x00ffffff;                                                   \
 494    tmp |= (((d) & 0xff) << 24);                                         \
 495    *_ptr = tmp;                                 \
 496 } while (0)
 497 #endif
 498
 499 #ifdef COMPILE_R300
 500 #define READ_STENCIL( d, _x, _y )                                       \
 501 do {                                                                    \
 502    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 503    GLuint tmp = *_ptr;                          \
 504    d = tmp & 0x000000ff;                                                \
 505 } while (0)
 506 #elif defined(RADEON_COMMON_FOR_R200)
 507 #define READ_STENCIL( d, _x, _y )                                       \
 508 do {                                                                    \
 509    GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );             \
 510    GLuint tmp = *_ptr;                          \
 511    d = (tmp & 0xff000000) >> 24;                                        \
 512 } while (0)
 513 #else
 514 #define READ_STENCIL( d, _x, _y )                                       \
 515 do {                                                                    \
 516    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );             \
 517    GLuint tmp = *_ptr;                          \
 518    d = (tmp & 0xff000000) >> 24;                                        \
 519 } while (0)
 520 #endif
 521
 522 #define TAG(x) radeon##x##_z24_s8
 523 #include "stenciltmp.h"
 524
 525
 526 static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
 527 {
 528         struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
 529         int r;
 530
 531         if (rrb == NULL || !rrb->bo)
 532                 return;
 533
 534         if (flag) {
 535                 if (rrb->bo->bom->funcs->bo_wait)
 536                         radeon_bo_wait(rrb->bo);
 537                 r = radeon_bo_map(rrb->bo, 1);
 538                 if (r) {
 539                         fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
 540                                 __FUNCTION__, r);
 541                 }
 542
 543                 radeonSetSpanFunctions(rrb);
 544         } else {
 545                 radeon_bo_unmap(rrb->bo);
 546                 rb->GetRow = NULL;
 547                 rb->PutRow = NULL;
 548         }
 549 }
 550
 551 static void
 552 radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
 553 {
 554         GLuint i, j;
 555
 556         /* color draw buffers */
 557         for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
 558                 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
 559
 560         /* check for render to textures */
 561         for (i = 0; i < BUFFER_COUNT; i++) {
 562                 struct gl_renderbuffer_attachment *att =
 563                         ctx->DrawBuffer->Attachment + i;
 564                 struct gl_texture_object *tex = att->Texture;
 565                 if (tex) {
 566                         /* Render to texture. Note that a mipmapped texture need not
 567                          * be complete for render to texture, so we must restrict to
 568                          * mapping only the attached image.
 569                          */
 570                         radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
 571                         ASSERT(att->Renderbuffer);
 572
 573                         if (map)
 574                                 radeon_teximage_map(image, GL_TRUE);
 575                         else
 576                                 radeon_teximage_unmap(image);
 577                 }
 578         }
 579
 580         map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
 581
 582         /* depth buffer (Note wrapper!) */
 583         if (ctx->DrawBuffer->_DepthBuffer)
 584                 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
 585
 586         if (ctx->DrawBuffer->_StencilBuffer)
 587                 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
 588 }
 589
 590 static void radeonSpanRenderStart(GLcontext * ctx)
 591 {
 592         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 593         int i;
 594
 595         radeon_firevertices(rmesa);
 596
 597         /* The locking and wait for idle should really only be needed in classic mode.
 598          * In a future memory manager based implementation, this should become
 599          * unnecessary due to the fact that mapping our buffers, textures, etc.
 600          * should implicitly wait for any previous rendering commands that must
 601          * be waited on. */
 602         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
 603                 LOCK_HARDWARE(rmesa);
 604                 radeonWaitForIdleLocked(rmesa);
 605         }
 606
 607         for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
 608                 if (ctx->Texture.Unit[i]._ReallyEnabled)
 609                         ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
 610         }
 611
 612         radeon_map_unmap_buffers(ctx, 1);
 613 }
 614
 615 static void radeonSpanRenderFinish(GLcontext * ctx)
 616 {
 617         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 618         int i;
 619         _swrast_flush(ctx);
 620         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
 621                 UNLOCK_HARDWARE(rmesa);
 622         }
 623         for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
 624                 if (ctx->Texture.Unit[i]._ReallyEnabled)
 625                         ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
 626         }
 627
 628         radeon_map_unmap_buffers(ctx, 0);
 629 }
 630
 631 void radeonInitSpanFuncs(GLcontext * ctx)
 632 {
 633         struct swrast_device_driver *swdd =
 634             _swrast_GetDeviceDriverReference(ctx);
 635         swdd->SpanRenderStart = radeonSpanRenderStart;
 636         swdd->SpanRenderFinish = radeonSpanRenderFinish;
 637 }
 638
 639 /**
 640  * Plug in the Get/Put routines for the given driRenderbuffer.
 641  */
 642 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
 643 {
 644         if (rrb->base._ActualFormat == GL_RGB5) {
 645                 radeonInitPointers_RGB565(&rrb->base);
 646         } else if (rrb->base._ActualFormat == GL_RGB8) {
 647                 radeonInitPointers_xRGB8888(&rrb->base);
 648         } else if (rrb->base._ActualFormat == GL_RGBA8) {
 649                 radeonInitPointers_ARGB8888(&rrb->base);
 650         } else if (rrb->base._ActualFormat == GL_RGBA4) {
 651                 radeonInitPointers_ARGB4444(&rrb->base);
 652         } else if (rrb->base._ActualFormat == GL_RGB5_A1) {
 653                 radeonInitPointers_ARGB1555(&rrb->base);
 654         } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
 655                 radeonInitDepthPointers_z16(&rrb->base);
 656         } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
 657                 radeonInitDepthPointers_z24(&rrb->base);
 658         } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
 659                 radeonInitDepthPointers_z24_s8(&rrb->base);
 660         } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
 661                 radeonInitStencilPointers_z24_s8(&rrb->base);
 662         } else {
 663                 fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat);
 664         }
 665 }