src/mesa/drivers/dri/radeon/radeon_texstate.c

   1 /* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c,v 1.6 2002/12/16 16:18:59 dawes Exp $ */
   2 /**************************************************************************
   3
   4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   5                      VA Linux Systems Inc., Fremont, California.
   6
   7 All Rights Reserved.
   8
   9 Permission is hereby granted, free of charge, to any person obtaining
  10 a copy of this software and associated documentation files (the
  11 "Software"), to deal in the Software without restriction, including
  12 without limitation the rights to use, copy, modify, merge, publish,
  13 distribute, sublicense, and/or sell copies of the Software, and to
  14 permit persons to whom the Software is furnished to do so, subject to
  15 the following conditions:
  16
  17 The above copyright notice and this permission notice (including the
  18 next paragraph) shall be included in all copies or substantial
  19 portions of the Software.
  20
  21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28
  29 **************************************************************************/
  30
  31 /*
  32  * Authors:
  33  *   Kevin E. Martin <martin@valinux.com>
  34  *   Gareth Hughes <gareth@valinux.com>
  35  */
  36
  37 #include "glheader.h"
  38 #include "imports.h"
  39 #include "colormac.h"
  40 #include "context.h"
  41 #include "macros.h"
  42 #include "texformat.h"
  43 #include "enums.h"
  44
  45 #include "radeon_context.h"
  46 #include "radeon_state.h"
  47 #include "radeon_ioctl.h"
  48 #include "radeon_swtcl.h"
  49 #include "radeon_tex.h"
  50 #include "radeon_tcl.h"
  51
  52
  53 #define RADEON_TXFORMAT_A8        RADEON_TXFORMAT_I8
  54 #define RADEON_TXFORMAT_L8        RADEON_TXFORMAT_I8
  55 #define RADEON_TXFORMAT_AL88      RADEON_TXFORMAT_AI88
  56 #define RADEON_TXFORMAT_YCBCR     RADEON_TXFORMAT_YVYU422
  57 #define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422
  58 #define RADEON_TXFORMAT_RGB_DXT1  RADEON_TXFORMAT_DXT1
  59 #define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1
  60 #define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23
  61 #define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45
  62
  63 #define _COLOR(f) \
  64     [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, 0 }
  65 #define _COLOR_REV(f) \
  66     [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f, 0 }
  67 #define _ALPHA(f) \
  68     [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
  69 #define _ALPHA_REV(f) \
  70     [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
  71 #define _YUV(f) \
  72    [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, RADEON_YUV_TO_RGB }
  73 #define _INVALID(f) \
  74     [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
  75 #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
  76                              && (tx_table[f].format != 0xffffffff) )
  77
  78 static const struct {
  79    GLuint format, filter;
  80 }
  81 tx_table[] =
  82 {
  83    _ALPHA(RGBA8888),
  84    _ALPHA_REV(RGBA8888),
  85    _ALPHA(ARGB8888),
  86    _ALPHA_REV(ARGB8888),
  87    _INVALID(RGB888),
  88    _COLOR(RGB565),
  89    _COLOR_REV(RGB565),
  90    _ALPHA(ARGB4444),
  91    _ALPHA_REV(ARGB4444),
  92    _ALPHA(ARGB1555),
  93    _ALPHA_REV(ARGB1555),
  94    _ALPHA(AL88),
  95    _ALPHA_REV(AL88),
  96    _ALPHA(A8),
  97    _COLOR(L8),
  98    _ALPHA(I8),
  99    _INVALID(CI8),
 100    _YUV(YCBCR),
 101    _YUV(YCBCR_REV),
 102    _INVALID(RGB_FXT1),
 103    _INVALID(RGBA_FXT1),
 104    _COLOR(RGB_DXT1),
 105    _ALPHA(RGBA_DXT1),
 106    _ALPHA(RGBA_DXT3),
 107    _ALPHA(RGBA_DXT5),
 108 };
 109
 110 #undef _COLOR
 111 #undef _ALPHA
 112 #undef _INVALID
 113
 114 /**
 115  * This function computes the number of bytes of storage needed for
 116  * the given texture object (all mipmap levels, all cube faces).
 117  * The \c image[face][level].x/y/width/height parameters for upload/blitting
 118  * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
 119  * too.
 120  *
 121  * \param rmesa Context pointer
 122  * \param tObj GL texture object whose images are to be posted to
 123  *                 hardware state.
 124  */
 125 static void radeonSetTexImages( radeonContextPtr rmesa,
 126                                 struct gl_texture_object *tObj )
 127 {
 128    radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
 129    const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
 130    GLint curOffset, blitWidth;
 131    GLint i, texelBytes;
 132    GLint numLevels;
 133    GLint log2Width, log2Height, log2Depth;
 134
 135    /* Set the hardware texture format
 136     */
 137
 138    t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
 139                        RADEON_TXFORMAT_ALPHA_IN_MAP);
 140    t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
 141
 142    if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
 143       t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
 144       t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
 145    }
 146    else {
 147       _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
 148       return;
 149    }
 150
 151    texelBytes = baseImage->TexFormat->TexelBytes;
 152
 153    /* Compute which mipmap levels we really want to send to the hardware.
 154     */
 155
 156    if (tObj->Target != GL_TEXTURE_CUBE_MAP)
 157       driCalculateTextureFirstLastLevel( (driTextureObject *) t );
 158    else {
 159       /* r100 can't handle mipmaps for cube/3d textures, so don't waste
 160          memory for them */
 161       t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel;
 162    }
 163    log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
 164    log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
 165    log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
 166
 167    numLevels = t->base.lastLevel - t->base.firstLevel + 1;
 168
 169    assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
 170
 171    /* Calculate mipmap offsets and dimensions for blitting (uploading)
 172     * The idea is that we lay out the mipmap levels within a block of
 173     * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
 174     */
 175    curOffset = 0;
 176    blitWidth = BLIT_WIDTH_BYTES;
 177    t->tile_bits = 0;
 178
 179    /* figure out if this texture is suitable for tiling. */
 180    if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
 181       if (rmesa->texmicrotile && (baseImage->Height > 1)) {
 182          /* allow 32 (bytes) x 1 mip (which will use two times the space
 183             the non-tiled version would use) max if base texture is large enough */
 184          if ((numLevels == 1) ||
 185            (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
 186                (baseImage->Width * texelBytes > 64)) ||
 187             ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
 188             /* R100 has two microtile bits (only the txoffset reg, not the blitter)
 189                weird: X2 + OPT: 32bit correct, 16bit completely hosed
 190                       X2: 32bit correct, 16bit correct
 191                       OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
 192             t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
 193          }
 194       }
 195       if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
 196          /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
 197             in the case if height is smaller than 16 (not 100% sure), as does the r200,
 198             so need to disable macro tiling in that case */
 199          if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
 200             t->tile_bits |= RADEON_TXO_MACRO_TILE;
 201          }
 202       }
 203    }
 204
 205    for (i = 0; i < numLevels; i++) {
 206       const struct gl_texture_image *texImage;
 207       GLuint size;
 208
 209       texImage = tObj->Image[0][i + t->base.firstLevel];
 210       if ( !texImage )
 211          break;
 212
 213       /* find image size in bytes */
 214       if (texImage->IsCompressed) {
 215       /* need to calculate the size AFTER padding even though the texture is
 216          submitted without padding.
 217          Only handle pot textures currently - don't know if npot is even possible,
 218          size calculation would certainly need (trivial) adjustments.
 219          Align (and later pad) to 32byte, not sure what that 64byte blit width is
 220          good for? */
 221          if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
 222             /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
 223             if ((texImage->Width + 3) < 8) /* width one block */
 224                size = texImage->CompressedSize * 4;
 225             else if ((texImage->Width + 3) < 16)
 226                size = texImage->CompressedSize * 2;
 227             else size = texImage->CompressedSize;
 228          }
 229          else /* DXT3/5, 16 bytes per block */
 230             if ((texImage->Width + 3) < 8)
 231                size = texImage->CompressedSize * 2;
 232             else size = texImage->CompressedSize;
 233       }
 234       else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
 235          size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
 236       }
 237       else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
 238          /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
 239             though the actual offset may be different (if texture is less than
 240             32 bytes width) to the untiled case */
 241          int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
 242          size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
 243          blitWidth = MAX2(texImage->Width, 64 / texelBytes);
 244       }
 245       else {
 246          int w = (texImage->Width * texelBytes + 31) & ~31;
 247          size = w * texImage->Height * texImage->Depth;
 248          blitWidth = MAX2(texImage->Width, 64 / texelBytes);
 249       }
 250       assert(size > 0);
 251
 252       /* Align to 32-byte offset.  It is faster to do this unconditionally
 253        * (no branch penalty).
 254        */
 255
 256       curOffset = (curOffset + 0x1f) & ~0x1f;
 257
 258       if (texelBytes) {
 259          t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
 260          t->image[0][i].y = 0;
 261          t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
 262          t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
 263       }
 264       else {
 265          t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
 266          t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
 267          t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
 268          t->image[0][i].height = size / t->image[0][i].width;
 269       }
 270
 271 #if 0
 272       /* for debugging only and only  applicable to non-rectangle targets */
 273       assert(size % t->image[0][i].width == 0);
 274       assert(t->image[0][i].x == 0
 275              || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
 276 #endif
 277
 278       if (0)
 279          fprintf(stderr,
 280                  "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
 281                  i, texImage->Width, texImage->Height,
 282                  t->image[0][i].x, t->image[0][i].y,
 283                  t->image[0][i].width, t->image[0][i].height, size, curOffset);
 284
 285       curOffset += size;
 286
 287    }
 288
 289    /* Align the total size of texture memory block.
 290     */
 291    t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
 292
 293    /* Setup remaining cube face blits, if needed */
 294    if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
 295       const GLuint faceSize = t->base.totalSize;
 296       GLuint face;
 297       /* reuse face 0 x/y/width/height - just update the offset when uploading */
 298       for (face = 1; face < 6; face++) {
 299          for (i = 0; i < numLevels; i++) {
 300             t->image[face][i].x =  t->image[0][i].x;
 301             t->image[face][i].y =  t->image[0][i].y;
 302             t->image[face][i].width  = t->image[0][i].width;
 303             t->image[face][i].height = t->image[0][i].height;
 304          }
 305       }
 306       t->base.totalSize = 6 * faceSize; /* total texmem needed */
 307    }
 308
 309    /* Hardware state:
 310     */
 311    t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
 312    t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
 313
 314    t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
 315                        RADEON_TXFORMAT_HEIGHT_MASK |
 316                        RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
 317                        RADEON_TXFORMAT_F5_WIDTH_MASK |
 318                        RADEON_TXFORMAT_F5_HEIGHT_MASK);
 319    t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
 320                       (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
 321
 322    if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
 323       assert(log2Width == log2Height);
 324       t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
 325                          (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
 326                          (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
 327       t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
 328                            (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
 329                            (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
 330                            (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
 331                            (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
 332                            (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
 333                            (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
 334                            (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
 335    }
 336
 337    t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
 338                    ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
 339
 340    /* Only need to round to nearest 32 for textures, but the blitter
 341     * requires 64-byte aligned pitches, and we may/may not need the
 342     * blitter.   NPOT only!
 343     */
 344    if (baseImage->IsCompressed)
 345       t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
 346    else
 347       t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
 348    t->pp_txpitch -= 32;
 349
 350    t->dirty_state = TEX_ALL;
 351
 352    /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */
 353 }
 354
 355
 356
 357 /* ================================================================
 358  * Texture combine functions
 359  */
 360
 361 /* GL_ARB_texture_env_combine support
 362  */
 363
 364 /* The color tables have combine functions for GL_SRC_COLOR,
 365  * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
 366  */
 367 static GLuint radeon_texture_color[][RADEON_MAX_TEXTURE_UNITS] =
 368 {
 369    {
 370       RADEON_COLOR_ARG_A_T0_COLOR,
 371       RADEON_COLOR_ARG_A_T1_COLOR,
 372       RADEON_COLOR_ARG_A_T2_COLOR
 373    },
 374    {
 375       RADEON_COLOR_ARG_A_T0_COLOR | RADEON_COMP_ARG_A,
 376       RADEON_COLOR_ARG_A_T1_COLOR | RADEON_COMP_ARG_A,
 377       RADEON_COLOR_ARG_A_T2_COLOR | RADEON_COMP_ARG_A
 378    },
 379    {
 380       RADEON_COLOR_ARG_A_T0_ALPHA,
 381       RADEON_COLOR_ARG_A_T1_ALPHA,
 382       RADEON_COLOR_ARG_A_T2_ALPHA
 383    },
 384    {
 385       RADEON_COLOR_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
 386       RADEON_COLOR_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
 387       RADEON_COLOR_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
 388    },
 389 };
 390
 391 static GLuint radeon_tfactor_color[] =
 392 {
 393    RADEON_COLOR_ARG_A_TFACTOR_COLOR,
 394    RADEON_COLOR_ARG_A_TFACTOR_COLOR | RADEON_COMP_ARG_A,
 395    RADEON_COLOR_ARG_A_TFACTOR_ALPHA,
 396    RADEON_COLOR_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
 397 };
 398
 399 static GLuint radeon_primary_color[] =
 400 {
 401    RADEON_COLOR_ARG_A_DIFFUSE_COLOR,
 402    RADEON_COLOR_ARG_A_DIFFUSE_COLOR | RADEON_COMP_ARG_A,
 403    RADEON_COLOR_ARG_A_DIFFUSE_ALPHA,
 404    RADEON_COLOR_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
 405 };
 406
 407 static GLuint radeon_previous_color[] =
 408 {
 409    RADEON_COLOR_ARG_A_CURRENT_COLOR,
 410    RADEON_COLOR_ARG_A_CURRENT_COLOR | RADEON_COMP_ARG_A,
 411    RADEON_COLOR_ARG_A_CURRENT_ALPHA,
 412    RADEON_COLOR_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
 413 };
 414
 415 /* GL_ZERO table - indices 0-3
 416  * GL_ONE  table - indices 1-4
 417  */
 418 static GLuint radeon_zero_color[] =
 419 {
 420    RADEON_COLOR_ARG_A_ZERO,
 421    RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
 422    RADEON_COLOR_ARG_A_ZERO,
 423    RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
 424    RADEON_COLOR_ARG_A_ZERO
 425 };
 426
 427
 428 /* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
 429  */
 430 static GLuint radeon_texture_alpha[][RADEON_MAX_TEXTURE_UNITS] =
 431 {
 432    {
 433       RADEON_ALPHA_ARG_A_T0_ALPHA,
 434       RADEON_ALPHA_ARG_A_T1_ALPHA,
 435       RADEON_ALPHA_ARG_A_T2_ALPHA
 436    },
 437    {
 438       RADEON_ALPHA_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
 439       RADEON_ALPHA_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
 440       RADEON_ALPHA_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
 441    },
 442 };
 443
 444 static GLuint radeon_tfactor_alpha[] =
 445 {
 446    RADEON_ALPHA_ARG_A_TFACTOR_ALPHA,
 447    RADEON_ALPHA_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
 448 };
 449
 450 static GLuint radeon_primary_alpha[] =
 451 {
 452    RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA,
 453    RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
 454 };
 455
 456 static GLuint radeon_previous_alpha[] =
 457 {
 458    RADEON_ALPHA_ARG_A_CURRENT_ALPHA,
 459    RADEON_ALPHA_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
 460 };
 461
 462 /* GL_ZERO table - indices 0-1
 463  * GL_ONE  table - indices 1-2
 464  */
 465 static GLuint radeon_zero_alpha[] =
 466 {
 467    RADEON_ALPHA_ARG_A_ZERO,
 468    RADEON_ALPHA_ARG_A_ZERO | RADEON_COMP_ARG_A,
 469    RADEON_ALPHA_ARG_A_ZERO
 470 };
 471
 472
 473 /* Extract the arg from slot A, shift it into the correct argument slot
 474  * and set the corresponding complement bit.
 475  */
 476 #define RADEON_COLOR_ARG( n, arg )                      \
 477 do {                                                    \
 478    color_combine |=                                     \
 479       ((color_arg[n] & RADEON_COLOR_ARG_MASK)           \
 480        << RADEON_COLOR_ARG_##arg##_SHIFT);              \
 481    color_combine |=                                     \
 482       ((color_arg[n] >> RADEON_COMP_ARG_SHIFT)          \
 483        << RADEON_COMP_ARG_##arg##_SHIFT);               \
 484 } while (0)
 485
 486 #define RADEON_ALPHA_ARG( n, arg )                      \
 487 do {                                                    \
 488    alpha_combine |=                                     \
 489       ((alpha_arg[n] & RADEON_ALPHA_ARG_MASK)           \
 490        << RADEON_ALPHA_ARG_##arg##_SHIFT);              \
 491    alpha_combine |=                                     \
 492       ((alpha_arg[n] >> RADEON_COMP_ARG_SHIFT)          \
 493        << RADEON_COMP_ARG_##arg##_SHIFT);               \
 494 } while (0)
 495
 496
 497 /* ================================================================
 498  * Texture unit state management
 499  */
 500
 501 static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 502 {
 503    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 504    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 505    GLuint color_combine, alpha_combine;
 506    const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
 507          | RADEON_COLOR_ARG_C_CURRENT_COLOR | RADEON_BLEND_CTL_ADD
 508          | RADEON_SCALE_1X | RADEON_CLAMP_TX;
 509    const GLuint alpha_combine0 = RADEON_ALPHA_ARG_A_ZERO | RADEON_ALPHA_ARG_B_ZERO
 510          | RADEON_ALPHA_ARG_C_CURRENT_ALPHA | RADEON_BLEND_CTL_ADD
 511          | RADEON_SCALE_1X | RADEON_CLAMP_TX;
 512
 513
 514    /* texUnit->_Current can be NULL if and only if the texture unit is
 515     * not actually enabled.
 516     */
 517    assert( (texUnit->_ReallyEnabled == 0)
 518            || (texUnit->_Current != NULL) );
 519
 520    if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
 521       fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
 522    }
 523
 524    /* Set the texture environment state.  Isn't this nice and clean?
 525     * The chip will automagically set the texture alpha to 0xff when
 526     * the texture format does not include an alpha component.  This
 527     * reduces the amount of special-casing we have to do, alpha-only
 528     * textures being a notable exception.
 529     */
 530     /* Don't cache these results.
 531     */
 532    rmesa->state.texture.unit[unit].format = 0;
 533    rmesa->state.texture.unit[unit].envMode = 0;
 534
 535    if ( !texUnit->_ReallyEnabled ) {
 536       color_combine = color_combine0;
 537       alpha_combine = alpha_combine0;
 538    }
 539    else {
 540       GLuint color_arg[3], alpha_arg[3];
 541       GLuint i;
 542       const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
 543       const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
 544       GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
 545       GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
 546
 547
 548       /* Step 1:
 549        * Extract the color and alpha combine function arguments.
 550        */
 551       for ( i = 0 ; i < numColorArgs ; i++ ) {
 552          const GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
 553          const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
 554          assert(op >= 0);
 555          assert(op <= 3);
 556          switch ( srcRGBi ) {
 557          case GL_TEXTURE:
 558             color_arg[i] = radeon_texture_color[op][unit];
 559             break;
 560          case GL_CONSTANT:
 561             color_arg[i] = radeon_tfactor_color[op];
 562             break;
 563          case GL_PRIMARY_COLOR:
 564             color_arg[i] = radeon_primary_color[op];
 565             break;
 566          case GL_PREVIOUS:
 567             color_arg[i] = radeon_previous_color[op];
 568             break;
 569          case GL_ZERO:
 570             color_arg[i] = radeon_zero_color[op];
 571             break;
 572          case GL_ONE:
 573             color_arg[i] = radeon_zero_color[op+1];
 574             break;
 575          case GL_TEXTURE0:
 576          case GL_TEXTURE1:
 577          case GL_TEXTURE2:
 578          /* implement ogl 1.4/1.5 core spec here, not specification of
 579           * GL_ARB_texture_env_crossbar (which would require disabling blending
 580           * instead of undefined results when referencing not enabled texunit) */
 581            color_arg[i] = radeon_texture_color[op][srcRGBi - GL_TEXTURE0];
 582            break;
 583          default:
 584             return GL_FALSE;
 585          }
 586       }
 587
 588       for ( i = 0 ; i < numAlphaArgs ; i++ ) {
 589          const GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
 590          const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
 591          assert(op >= 0);
 592          assert(op <= 1);
 593          switch ( srcAi ) {
 594          case GL_TEXTURE:
 595             alpha_arg[i] = radeon_texture_alpha[op][unit];
 596             break;
 597          case GL_CONSTANT:
 598             alpha_arg[i] = radeon_tfactor_alpha[op];
 599             break;
 600          case GL_PRIMARY_COLOR:
 601             alpha_arg[i] = radeon_primary_alpha[op];
 602             break;
 603          case GL_PREVIOUS:
 604             alpha_arg[i] = radeon_previous_alpha[op];
 605             break;
 606          case GL_ZERO:
 607             alpha_arg[i] = radeon_zero_alpha[op];
 608             break;
 609          case GL_ONE:
 610             alpha_arg[i] = radeon_zero_alpha[op+1];
 611             break;
 612          case GL_TEXTURE0:
 613          case GL_TEXTURE1:
 614          case GL_TEXTURE2:
 615            alpha_arg[i] = radeon_texture_alpha[op][srcAi - GL_TEXTURE0];
 616            break;
 617          default:
 618             return GL_FALSE;
 619          }
 620       }
 621
 622       /* Step 2:
 623        * Build up the color and alpha combine functions.
 624        */
 625       switch ( texUnit->_CurrentCombine->ModeRGB ) {
 626       case GL_REPLACE:
 627          color_combine = (RADEON_COLOR_ARG_A_ZERO |
 628                           RADEON_COLOR_ARG_B_ZERO |
 629                           RADEON_BLEND_CTL_ADD |
 630                           RADEON_CLAMP_TX);
 631          RADEON_COLOR_ARG( 0, C );
 632          break;
 633       case GL_MODULATE:
 634          color_combine = (RADEON_COLOR_ARG_C_ZERO |
 635                           RADEON_BLEND_CTL_ADD |
 636                           RADEON_CLAMP_TX);
 637          RADEON_COLOR_ARG( 0, A );
 638          RADEON_COLOR_ARG( 1, B );
 639          break;
 640       case GL_ADD:
 641          color_combine = (RADEON_COLOR_ARG_B_ZERO |
 642                           RADEON_COMP_ARG_B |
 643                           RADEON_BLEND_CTL_ADD |
 644                           RADEON_CLAMP_TX);
 645          RADEON_COLOR_ARG( 0, A );
 646          RADEON_COLOR_ARG( 1, C );
 647          break;
 648       case GL_ADD_SIGNED:
 649          color_combine = (RADEON_COLOR_ARG_B_ZERO |
 650                           RADEON_COMP_ARG_B |
 651                           RADEON_BLEND_CTL_ADDSIGNED |
 652                           RADEON_CLAMP_TX);
 653          RADEON_COLOR_ARG( 0, A );
 654          RADEON_COLOR_ARG( 1, C );
 655          break;
 656       case GL_SUBTRACT:
 657          color_combine = (RADEON_COLOR_ARG_B_ZERO |
 658                           RADEON_COMP_ARG_B |
 659                           RADEON_BLEND_CTL_SUBTRACT |
 660                           RADEON_CLAMP_TX);
 661          RADEON_COLOR_ARG( 0, A );
 662          RADEON_COLOR_ARG( 1, C );
 663          break;
 664       case GL_INTERPOLATE:
 665          color_combine = (RADEON_BLEND_CTL_BLEND |
 666                           RADEON_CLAMP_TX);
 667          RADEON_COLOR_ARG( 0, B );
 668          RADEON_COLOR_ARG( 1, A );
 669          RADEON_COLOR_ARG( 2, C );
 670          break;
 671
 672       case GL_DOT3_RGB_EXT:
 673       case GL_DOT3_RGBA_EXT:
 674          /* The EXT version of the DOT3 extension does not support the
 675           * scale factor, but the ARB version (and the version in OpenGL
 676           * 1.3) does.
 677           */
 678          RGBshift = 0;
 679          /* FALLTHROUGH */
 680
 681       case GL_DOT3_RGB:
 682       case GL_DOT3_RGBA:
 683          /* The R100 / RV200 only support a 1X multiplier in hardware
 684           * w/the ARB version.
 685           */
 686          if ( RGBshift != (RADEON_SCALE_1X >> RADEON_SCALE_SHIFT) ) {
 687             return GL_FALSE;
 688          }
 689
 690          RGBshift += 2;
 691          if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
 692             || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
 693             /* is it necessary to set this or will it be ignored anyway? */
 694             Ashift = RGBshift;
 695          }
 696
 697          color_combine = (RADEON_COLOR_ARG_C_ZERO |
 698                           RADEON_BLEND_CTL_DOT3 |
 699                           RADEON_CLAMP_TX);
 700          RADEON_COLOR_ARG( 0, A );
 701          RADEON_COLOR_ARG( 1, B );
 702          break;
 703
 704       case GL_MODULATE_ADD_ATI:
 705          color_combine = (RADEON_BLEND_CTL_ADD |
 706                           RADEON_CLAMP_TX);
 707          RADEON_COLOR_ARG( 0, A );
 708          RADEON_COLOR_ARG( 1, C );
 709          RADEON_COLOR_ARG( 2, B );
 710          break;
 711       case GL_MODULATE_SIGNED_ADD_ATI:
 712          color_combine = (RADEON_BLEND_CTL_ADDSIGNED |
 713                           RADEON_CLAMP_TX);
 714          RADEON_COLOR_ARG( 0, A );
 715          RADEON_COLOR_ARG( 1, C );
 716          RADEON_COLOR_ARG( 2, B );
 717          break;
 718       case GL_MODULATE_SUBTRACT_ATI:
 719          color_combine = (RADEON_BLEND_CTL_SUBTRACT |
 720                           RADEON_CLAMP_TX);
 721          RADEON_COLOR_ARG( 0, A );
 722          RADEON_COLOR_ARG( 1, C );
 723          RADEON_COLOR_ARG( 2, B );
 724          break;
 725       default:
 726          return GL_FALSE;
 727       }
 728
 729       switch ( texUnit->_CurrentCombine->ModeA ) {
 730       case GL_REPLACE:
 731          alpha_combine = (RADEON_ALPHA_ARG_A_ZERO |
 732                           RADEON_ALPHA_ARG_B_ZERO |
 733                           RADEON_BLEND_CTL_ADD |
 734                           RADEON_CLAMP_TX);
 735          RADEON_ALPHA_ARG( 0, C );
 736          break;
 737       case GL_MODULATE:
 738          alpha_combine = (RADEON_ALPHA_ARG_C_ZERO |
 739                           RADEON_BLEND_CTL_ADD |
 740                           RADEON_CLAMP_TX);
 741          RADEON_ALPHA_ARG( 0, A );
 742          RADEON_ALPHA_ARG( 1, B );
 743          break;
 744       case GL_ADD:
 745          alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
 746                           RADEON_COMP_ARG_B |
 747                           RADEON_BLEND_CTL_ADD |
 748                           RADEON_CLAMP_TX);
 749          RADEON_ALPHA_ARG( 0, A );
 750          RADEON_ALPHA_ARG( 1, C );
 751          break;
 752       case GL_ADD_SIGNED:
 753          alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
 754                           RADEON_COMP_ARG_B |
 755                           RADEON_BLEND_CTL_ADDSIGNED |
 756                           RADEON_CLAMP_TX);
 757          RADEON_ALPHA_ARG( 0, A );
 758          RADEON_ALPHA_ARG( 1, C );
 759          break;
 760       case GL_SUBTRACT:
 761          alpha_combine = (RADEON_COLOR_ARG_B_ZERO |
 762                           RADEON_COMP_ARG_B |
 763                           RADEON_BLEND_CTL_SUBTRACT |
 764                           RADEON_CLAMP_TX);
 765          RADEON_ALPHA_ARG( 0, A );
 766          RADEON_ALPHA_ARG( 1, C );
 767          break;
 768       case GL_INTERPOLATE:
 769          alpha_combine = (RADEON_BLEND_CTL_BLEND |
 770                           RADEON_CLAMP_TX);
 771          RADEON_ALPHA_ARG( 0, B );
 772          RADEON_ALPHA_ARG( 1, A );
 773          RADEON_ALPHA_ARG( 2, C );
 774          break;
 775
 776       case GL_MODULATE_ADD_ATI:
 777          alpha_combine = (RADEON_BLEND_CTL_ADD |
 778                           RADEON_CLAMP_TX);
 779          RADEON_ALPHA_ARG( 0, A );
 780          RADEON_ALPHA_ARG( 1, C );
 781          RADEON_ALPHA_ARG( 2, B );
 782          break;
 783       case GL_MODULATE_SIGNED_ADD_ATI:
 784          alpha_combine = (RADEON_BLEND_CTL_ADDSIGNED |
 785                           RADEON_CLAMP_TX);
 786          RADEON_ALPHA_ARG( 0, A );
 787          RADEON_ALPHA_ARG( 1, C );
 788          RADEON_ALPHA_ARG( 2, B );
 789          break;
 790       case GL_MODULATE_SUBTRACT_ATI:
 791          alpha_combine = (RADEON_BLEND_CTL_SUBTRACT |
 792                           RADEON_CLAMP_TX);
 793          RADEON_ALPHA_ARG( 0, A );
 794          RADEON_ALPHA_ARG( 1, C );
 795          RADEON_ALPHA_ARG( 2, B );
 796          break;
 797       default:
 798          return GL_FALSE;
 799       }
 800
 801       if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB_EXT)
 802            || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB) ) {
 803          alpha_combine |= RADEON_DOT_ALPHA_DONT_REPLICATE;
 804       }
 805
 806       /* Step 3:
 807        * Apply the scale factor.
 808        */
 809       color_combine |= (RGBshift << RADEON_SCALE_SHIFT);
 810       alpha_combine |= (Ashift   << RADEON_SCALE_SHIFT);
 811
 812       /* All done!
 813        */
 814    }
 815
 816    if ( rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] != color_combine ||
 817         rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] != alpha_combine ) {
 818       RADEON_STATECHANGE( rmesa, tex[unit] );
 819       rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] = color_combine;
 820       rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] = alpha_combine;
 821    }
 822
 823    return GL_TRUE;
 824 }
 825
 826 #define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |       \
 827                               RADEON_MIN_FILTER_MASK |          \
 828                               RADEON_MAG_FILTER_MASK |          \
 829                               RADEON_MAX_ANISO_MASK |           \
 830                               RADEON_YUV_TO_RGB |               \
 831                               RADEON_YUV_TEMPERATURE_MASK |     \
 832                               RADEON_CLAMP_S_MASK |             \
 833                               RADEON_CLAMP_T_MASK |             \
 834                               RADEON_BORDER_MODE_D3D )
 835
 836 #define TEXOBJ_TXFORMAT_MASK (RADEON_TXFORMAT_WIDTH_MASK |      \
 837                               RADEON_TXFORMAT_HEIGHT_MASK |     \
 838                               RADEON_TXFORMAT_FORMAT_MASK |     \
 839                               RADEON_TXFORMAT_F5_WIDTH_MASK |   \
 840                               RADEON_TXFORMAT_F5_HEIGHT_MASK |  \
 841                               RADEON_TXFORMAT_ALPHA_IN_MAP |    \
 842                               RADEON_TXFORMAT_CUBIC_MAP_ENABLE |        \
 843                               RADEON_TXFORMAT_NON_POWER2)
 844
 845
 846 static void import_tex_obj_state( radeonContextPtr rmesa,
 847                                   int unit,
 848                                   radeonTexObjPtr texobj )
 849 {
 850    GLuint *cmd = RADEON_DB_STATE( tex[unit] );
 851
 852    cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
 853    cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
 854    cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
 855    cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
 856    cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
 857    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
 858
 859    if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
 860       GLuint *cube_cmd = RADEON_DB_STATE( cube[unit] );
 861       GLuint bytesPerFace = texobj->base.totalSize / 6;
 862       ASSERT(texobj->base.totalSize % 6 == 0);
 863
 864       cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
 865       /* dont know if this setup conforms to OpenGL..
 866        * at least it matches the behavior of mesa software renderer
 867        */
 868       cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */
 869       cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */
 870       cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */
 871       cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */
 872       cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */
 873       RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] );
 874       cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */
 875    }
 876    else if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
 877       GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
 878       txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
 879       txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
 880       RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] );
 881    }
 882
 883    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
 884
 885    texobj->dirty_state &= ~(1<<unit);
 886 }
 887
 888
 889
 890
 891 static void set_texgen_matrix( radeonContextPtr rmesa,
 892                                GLuint unit,
 893                                const GLfloat *s_plane,
 894                                const GLfloat *t_plane,
 895                                const GLfloat *r_plane,
 896                                const GLfloat *q_plane )
 897 {
 898    rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
 899    rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
 900    rmesa->TexGenMatrix[unit].m[8]  = s_plane[2];
 901    rmesa->TexGenMatrix[unit].m[12] = s_plane[3];
 902
 903    rmesa->TexGenMatrix[unit].m[1]  = t_plane[0];
 904    rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
 905    rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
 906    rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
 907
 908    rmesa->TexGenMatrix[unit].m[2]  = r_plane[0];
 909    rmesa->TexGenMatrix[unit].m[6]  = r_plane[1];
 910    rmesa->TexGenMatrix[unit].m[10] = r_plane[2];
 911    rmesa->TexGenMatrix[unit].m[14] = r_plane[3];
 912
 913    rmesa->TexGenMatrix[unit].m[3]  = q_plane[0];
 914    rmesa->TexGenMatrix[unit].m[7]  = q_plane[1];
 915    rmesa->TexGenMatrix[unit].m[11] = q_plane[2];
 916    rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
 917
 918    rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
 919    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
 920 }
 921
 922 /* Returns GL_FALSE if fallback required.
 923  */
 924 static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
 925 {
 926    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 927    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 928    GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
 929    GLuint tmp = rmesa->TexGenEnabled;
 930    static const GLfloat reflect[16] = {
 931       -1,  0,  0,  0,
 932        0, -1,  0,  0,
 933        0,  0,  -1, 0,
 934        0,  0,  0,  1 };
 935
 936    rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE << unit);
 937    rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE << unit);
 938    rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK << inputshift);
 939    rmesa->TexGenNeedNormals[unit] = 0;
 940
 941    if ((texUnit->TexGenEnabled & (S_BIT|T_BIT|R_BIT|Q_BIT)) == 0) {
 942       /* Disabled, no fallback:
 943        */
 944       rmesa->TexGenEnabled |=
 945          (RADEON_TEXGEN_INPUT_TEXCOORD_0 + unit) << inputshift;
 946       return GL_TRUE;
 947    }
 948    /* the r100 cannot do texgen for some coords and not for others
 949     * we do not detect such cases (certainly can't do it here) and just
 950     * ASSUME that when S and T are texgen enabled we do not need other
 951     * non-texgen enabled coords, no matter if the R and Q bits are texgen
 952     * enabled. Still check for mixed mode texgen for all coords.
 953     */
 954    else if ( (texUnit->TexGenEnabled & S_BIT) &&
 955              (texUnit->TexGenEnabled & T_BIT) &&
 956              (texUnit->GenModeS == texUnit->GenModeT) ) {
 957       if ( ((texUnit->TexGenEnabled & R_BIT) &&
 958             (texUnit->GenModeS != texUnit->GenModeR)) ||
 959            ((texUnit->TexGenEnabled & Q_BIT) &&
 960             (texUnit->GenModeS != texUnit->GenModeQ)) ) {
 961          /* Mixed modes, fallback:
 962           */
 963          if (RADEON_DEBUG & DEBUG_FALLBACKS)
 964             fprintf(stderr, "fallback mixed texgen\n");
 965          return GL_FALSE;
 966       }
 967       rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
 968    }
 969    else {
 970    /* some texgen mode not including both S and T bits */
 971       if (RADEON_DEBUG & DEBUG_FALLBACKS)
 972          fprintf(stderr, "fallback mixed texgen/nontexgen\n");
 973       return GL_FALSE;
 974    }
 975
 976    if ((texUnit->TexGenEnabled & (R_BIT | Q_BIT)) != 0) {
 977       /* need this here for vtxfmt presumably. Argh we need to set
 978          this from way too many places, would be much easier if we could leave
 979          tcl q coord always enabled as on r200) */
 980       RADEON_STATECHANGE( rmesa, tcl );
 981       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_Q_BIT(unit);
 982    }
 983
 984    switch (texUnit->GenModeS) {
 985    case GL_OBJECT_LINEAR:
 986       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift;
 987       set_texgen_matrix( rmesa, unit,
 988                          texUnit->ObjectPlaneS,
 989                          texUnit->ObjectPlaneT,
 990                          texUnit->ObjectPlaneR,
 991                          texUnit->ObjectPlaneQ);
 992       break;
 993
 994    case GL_EYE_LINEAR:
 995       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift;
 996       set_texgen_matrix( rmesa, unit,
 997                          texUnit->EyePlaneS,
 998                          texUnit->EyePlaneT,
 999                          texUnit->EyePlaneR,
1000                          texUnit->EyePlaneQ);
1001       break;
1002
1003    case GL_REFLECTION_MAP_NV:
1004       rmesa->TexGenNeedNormals[unit] = GL_TRUE;
1005       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT << inputshift;
1006       /* TODO: unknown if this is needed/correct */
1007       set_texgen_matrix( rmesa, unit, reflect, reflect + 4,
1008                         reflect + 8, reflect + 12 );
1009       break;
1010
1011    case GL_NORMAL_MAP_NV:
1012       rmesa->TexGenNeedNormals[unit] = GL_TRUE;
1013       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL << inputshift;
1014       break;
1015
1016    case GL_SPHERE_MAP:
1017       /* the mode which everyone uses :-( */
1018    default:
1019       /* Unsupported mode, fallback:
1020        */
1021       if (RADEON_DEBUG & DEBUG_FALLBACKS)
1022          fprintf(stderr, "fallback GL_SPHERE_MAP\n");
1023       return GL_FALSE;
1024    }
1025
1026    if (tmp != rmesa->TexGenEnabled) {
1027       rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1028    }
1029
1030    return GL_TRUE;
1031 }
1032
1033
1034 static void disable_tex( GLcontext *ctx, int unit )
1035 {
1036    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1037
1038    if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
1039       /* Texture unit disabled */
1040       if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
1041          /* The old texture is no longer bound to this texture unit.
1042           * Mark it as such.
1043           */
1044
1045          rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
1046          rmesa->state.texture.unit[unit].texobj = NULL;
1047       }
1048
1049       RADEON_STATECHANGE( rmesa, ctx );
1050       rmesa->hw.ctx.cmd[CTX_PP_CNTL] &=
1051           ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
1052
1053       RADEON_STATECHANGE( rmesa, tcl );
1054       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
1055                                                 RADEON_Q_BIT(unit));
1056
1057       if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
1058          TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
1059          rmesa->recheck_texgen[unit] = GL_TRUE;
1060       }
1061
1062       if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
1063       /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
1064          cubic_map bit on unit 2 when the unit is disabled, otherwise every
1065          2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
1066          units, better be safe than sorry though).*/
1067          RADEON_STATECHANGE( rmesa, tex[unit] );
1068          rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
1069       }
1070
1071       {
1072          GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
1073          GLuint tmp = rmesa->TexGenEnabled;
1074
1075          rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
1076          rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
1077          rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
1078          rmesa->TexGenNeedNormals[unit] = 0;
1079          rmesa->TexGenEnabled |=
1080              (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
1081
1082          if (tmp != rmesa->TexGenEnabled) {
1083             rmesa->recheck_texgen[unit] = GL_TRUE;
1084             rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1085          }
1086       }
1087    }
1088 }
1089
1090 static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
1091 {
1092    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1093    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1094    struct gl_texture_object *tObj = texUnit->_Current;
1095    radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
1096
1097    /* Need to load the 2d images associated with this unit.
1098     */
1099    if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
1100       t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
1101       t->base.dirty_images[0] = ~0;
1102    }
1103
1104    ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
1105
1106    if ( t->base.dirty_images[0] ) {
1107       RADEON_FIREVERTICES( rmesa );
1108       radeonSetTexImages( rmesa, tObj );
1109       radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
1110       if ( !t->base.memBlock )
1111         return GL_FALSE;
1112    }
1113
1114    return GL_TRUE;
1115 }
1116
1117 static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
1118 {
1119    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1120    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1121    struct gl_texture_object *tObj = texUnit->_Current;
1122    radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
1123    GLuint face;
1124
1125    /* Need to load the 2d images associated with this unit.
1126     */
1127    if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
1128       t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
1129       for (face = 0; face < 6; face++)
1130          t->base.dirty_images[face] = ~0;
1131    }
1132
1133    ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
1134
1135    if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
1136         t->base.dirty_images[2] || t->base.dirty_images[3] ||
1137         t->base.dirty_images[4] || t->base.dirty_images[5] ) {
1138       /* flush */
1139       RADEON_FIREVERTICES( rmesa );
1140       /* layout memory space, once for all faces */
1141       radeonSetTexImages( rmesa, tObj );
1142    }
1143
1144    /* upload (per face) */
1145    for (face = 0; face < 6; face++) {
1146       if (t->base.dirty_images[face]) {
1147          radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face );
1148       }
1149    }
1150
1151    if ( !t->base.memBlock ) {
1152       /* texmem alloc failed, use s/w fallback */
1153       return GL_FALSE;
1154    }
1155
1156    return GL_TRUE;
1157 }
1158
1159 static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
1160 {
1161    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1162    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1163    struct gl_texture_object *tObj = texUnit->_Current;
1164    radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
1165
1166    if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) {
1167       t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
1168       t->base.dirty_images[0] = ~0;
1169    }
1170
1171    ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
1172
1173    if ( t->base.dirty_images[0] ) {
1174       RADEON_FIREVERTICES( rmesa );
1175       radeonSetTexImages( rmesa, tObj );
1176       radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
1177       if ( !t->base.memBlock /* && !rmesa->prefer_gart_client_texturing  FIXME */ ) {
1178          fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
1179          return GL_FALSE;
1180       }
1181    }
1182
1183    return GL_TRUE;
1184 }
1185
1186
1187 static GLboolean update_tex_common( GLcontext *ctx, int unit )
1188 {
1189    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1190    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1191    struct gl_texture_object *tObj = texUnit->_Current;
1192    radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
1193    GLenum format;
1194
1195    /* Fallback if there's a texture border */
1196    if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
1197       fprintf(stderr, "%s: border\n", __FUNCTION__);
1198       return GL_FALSE;
1199    }
1200    /* yuv conversion only works in first unit */
1201    if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
1202       return GL_FALSE;
1203
1204    /* Update state if this is a different texture object to last
1205     * time.
1206     */
1207    if ( rmesa->state.texture.unit[unit].texobj != t ) {
1208       if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
1209          /* The old texture is no longer bound to this texture unit.
1210           * Mark it as such.
1211           */
1212
1213          rmesa->state.texture.unit[unit].texobj->base.bound &=
1214              ~(1UL << unit);
1215       }
1216
1217       rmesa->state.texture.unit[unit].texobj = t;
1218       t->base.bound |= (1UL << unit);
1219       t->dirty_state |= 1<<unit;
1220       driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
1221    }
1222
1223
1224    /* Newly enabled?
1225     */
1226    if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
1227       RADEON_STATECHANGE( rmesa, ctx );
1228       rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=
1229           (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
1230
1231       RADEON_STATECHANGE( rmesa, tcl );
1232
1233       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
1234
1235       rmesa->recheck_texgen[unit] = GL_TRUE;
1236    }
1237
1238    if (t->dirty_state & (1<<unit)) {
1239       import_tex_obj_state( rmesa, unit, t );
1240       /* may need to update texture matrix (for texrect adjustments) */
1241       rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1242    }
1243
1244    if (rmesa->recheck_texgen[unit]) {
1245       GLboolean fallback = !radeon_validate_texgen( ctx, unit );
1246       TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
1247       rmesa->recheck_texgen[unit] = 0;
1248       rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1249    }
1250
1251    format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
1252    if ( rmesa->state.texture.unit[unit].format != format ||
1253         rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
1254       rmesa->state.texture.unit[unit].format = format;
1255       rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
1256       if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
1257          return GL_FALSE;
1258       }
1259    }
1260
1261    FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
1262    return !t->border_fallback;
1263 }
1264
1265
1266
1267 static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
1268 {
1269    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1270
1271    if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
1272       return (enable_tex_rect( ctx, unit ) &&
1273               update_tex_common( ctx, unit ));
1274    }
1275    else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
1276       return (enable_tex_2d( ctx, unit ) &&
1277               update_tex_common( ctx, unit ));
1278    }
1279    else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
1280       return (enable_tex_cube( ctx, unit ) &&
1281               update_tex_common( ctx, unit ));
1282    }
1283    else if ( texUnit->_ReallyEnabled ) {
1284       return GL_FALSE;
1285    }
1286    else {
1287       disable_tex( ctx, unit );
1288       return GL_TRUE;
1289    }
1290 }
1291
1292 void radeonUpdateTextureState( GLcontext *ctx )
1293 {
1294    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1295    GLboolean ok;
1296
1297    ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
1298          radeonUpdateTextureUnit( ctx, 1 ) &&
1299          radeonUpdateTextureUnit( ctx, 2 ));
1300
1301    FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
1302
1303    if (rmesa->TclFallback)
1304       radeonChooseVertexState( ctx );
1305 }