src/mesa/drivers/dri/radeon/radeon_texstate.c

   1 /* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c,v 1.6 2002/12/16 16:18:59 dawes Exp $ */
   2 /**************************************************************************
   3
   4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   5                      VA Linux Systems Inc., Fremont, California.
   6
   7 All Rights Reserved.
   8
   9 Permission is hereby granted, free of charge, to any person obtaining
  10 a copy of this software and associated documentation files (the
  11 "Software"), to deal in the Software without restriction, including
  12 without limitation the rights to use, copy, modify, merge, publish,
  13 distribute, sublicense, and/or sell copies of the Software, and to
  14 permit persons to whom the Software is furnished to do so, subject to
  15 the following conditions:
  16
  17 The above copyright notice and this permission notice (including the
  18 next paragraph) shall be included in all copies or substantial
  19 portions of the Software.
  20
  21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28
  29 **************************************************************************/
  30
  31 /*
  32  * Authors:
  33  *   Kevin E. Martin <martin@valinux.com>
  34  *   Gareth Hughes <gareth@valinux.com>
  35  */
  36
  37 #include "glheader.h"
  38 #include "imports.h"
  39 #include "colormac.h"
  40 #include "context.h"
  41 #include "macros.h"
  42 #include "texformat.h"
  43 #include "enums.h"
  44
  45 #include "radeon_context.h"
  46 #include "radeon_state.h"
  47 #include "radeon_ioctl.h"
  48 #include "radeon_swtcl.h"
  49 #include "radeon_tex.h"
  50 #include "radeon_tcl.h"
  51
  52
  53 #define RADEON_TXFORMAT_A8        RADEON_TXFORMAT_I8
  54 #define RADEON_TXFORMAT_L8        RADEON_TXFORMAT_I8
  55 #define RADEON_TXFORMAT_AL88      RADEON_TXFORMAT_AI88
  56 #define RADEON_TXFORMAT_YCBCR     RADEON_TXFORMAT_YVYU422
  57 #define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422
  58 #define RADEON_TXFORMAT_RGB_DXT1  RADEON_TXFORMAT_DXT1
  59 #define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1
  60 #define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23
  61 #define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45
  62
  63 #define _COLOR(f) \
  64     [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, 0 }
  65 #define _COLOR_REV(f) \
  66     [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f, 0 }
  67 #define _ALPHA(f) \
  68     [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
  69 #define _ALPHA_REV(f) \
  70     [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
  71 #define _YUV(f) \
  72    [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, RADEON_YUV_TO_RGB }
  73 #define _INVALID(f) \
  74     [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
  75 #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
  76                              && (tx_table[f].format != 0xffffffff) )
  77
  78 static const struct {
  79    GLuint format, filter;
  80 }
  81 tx_table[] =
  82 {
  83    _ALPHA(RGBA8888),
  84    _ALPHA_REV(RGBA8888),
  85    _ALPHA(ARGB8888),
  86    _ALPHA_REV(ARGB8888),
  87    _INVALID(RGB888),
  88    _COLOR(RGB565),
  89    _COLOR_REV(RGB565),
  90    _ALPHA(ARGB4444),
  91    _ALPHA_REV(ARGB4444),
  92    _ALPHA(ARGB1555),
  93    _ALPHA_REV(ARGB1555),
  94    _ALPHA(AL88),
  95    _ALPHA_REV(AL88),
  96    _ALPHA(A8),
  97    _COLOR(L8),
  98    _ALPHA(I8),
  99    _INVALID(CI8),
 100    _YUV(YCBCR),
 101    _YUV(YCBCR_REV),
 102    _INVALID(RGB_FXT1),
 103    _INVALID(RGBA_FXT1),
 104    _COLOR(RGB_DXT1),
 105    _ALPHA(RGBA_DXT1),
 106    _ALPHA(RGBA_DXT3),
 107    _ALPHA(RGBA_DXT5),
 108 };
 109
 110 #undef _COLOR
 111 #undef _ALPHA
 112 #undef _INVALID
 113
 114 /**
 115  * This function computes the number of bytes of storage needed for
 116  * the given texture object (all mipmap levels, all cube faces).
 117  * The \c image[face][level].x/y/width/height parameters for upload/blitting
 118  * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
 119  * too.
 120  *
 121  * \param rmesa Context pointer
 122  * \param tObj GL texture object whose images are to be posted to
 123  *                 hardware state.
 124  */
 125 static void radeonSetTexImages( radeonContextPtr rmesa,
 126                                 struct gl_texture_object *tObj )
 127 {
 128    radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
 129    const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
 130    GLint curOffset, blitWidth;
 131    GLint i, texelBytes;
 132    GLint numLevels;
 133    GLint log2Width, log2Height, log2Depth;
 134
 135    /* Set the hardware texture format
 136     */
 137
 138    t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
 139                        RADEON_TXFORMAT_ALPHA_IN_MAP);
 140    t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
 141
 142    if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
 143       t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
 144       t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
 145    }
 146    else {
 147       _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
 148       return;
 149    }
 150
 151    texelBytes = baseImage->TexFormat->TexelBytes;
 152
 153    /* Compute which mipmap levels we really want to send to the hardware.
 154     */
 155
 156    if (tObj->Target != GL_TEXTURE_CUBE_MAP)
 157       driCalculateTextureFirstLastLevel( (driTextureObject *) t );
 158    else {
 159       /* r100 can't handle mipmaps for cube/3d textures, so don't waste
 160          memory for them */
 161       t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel;
 162    }
 163    log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
 164    log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
 165    log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
 166
 167    numLevels = t->base.lastLevel - t->base.firstLevel + 1;
 168
 169    assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
 170
 171    /* Calculate mipmap offsets and dimensions for blitting (uploading)
 172     * The idea is that we lay out the mipmap levels within a block of
 173     * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
 174     */
 175    curOffset = 0;
 176    blitWidth = BLIT_WIDTH_BYTES;
 177    t->tile_bits = 0;
 178
 179    /* figure out if this texture is suitable for tiling. */
 180    if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
 181       if (rmesa->texmicrotile && (baseImage->Height > 1)) {
 182          /* allow 32 (bytes) x 1 mip (which will use two times the space
 183             the non-tiled version would use) max if base texture is large enough */
 184          if ((numLevels == 1) ||
 185            (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
 186                (baseImage->Width * texelBytes > 64)) ||
 187             ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
 188             /* R100 has two microtile bits (only the txoffset reg, not the blitter)
 189                weird: X2 + OPT: 32bit correct, 16bit completely hosed
 190                       X2: 32bit correct, 16bit correct
 191                       OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
 192             t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
 193          }
 194       }
 195       if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
 196          /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
 197             in the case if height is smaller than 16 (not 100% sure), as does the r200,
 198             so need to disable macro tiling in that case */
 199          if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
 200             t->tile_bits |= RADEON_TXO_MACRO_TILE;
 201          }
 202       }
 203    }
 204
 205    for (i = 0; i < numLevels; i++) {
 206       const struct gl_texture_image *texImage;
 207       GLuint size;
 208
 209       texImage = tObj->Image[0][i + t->base.firstLevel];
 210       if ( !texImage )
 211          break;
 212
 213       /* find image size in bytes */
 214       if (texImage->IsCompressed) {
 215       /* need to calculate the size AFTER padding even though the texture is
 216          submitted without padding.
 217          Only handle pot textures currently - don't know if npot is even possible,
 218          size calculation would certainly need (trivial) adjustments.
 219          Align (and later pad) to 32byte, not sure what that 64byte blit width is
 220          good for? */
 221          if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
 222             /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
 223             if ((texImage->Width + 3) < 8) /* width one block */
 224                size = texImage->CompressedSize * 4;
 225             else if ((texImage->Width + 3) < 16)
 226                size = texImage->CompressedSize * 2;
 227             else size = texImage->CompressedSize;
 228          }
 229          else /* DXT3/5, 16 bytes per block */
 230             if ((texImage->Width + 3) < 8)
 231                size = texImage->CompressedSize * 2;
 232             else size = texImage->CompressedSize;
 233       }
 234       else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
 235          size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
 236       }
 237       else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
 238          /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
 239             though the actual offset may be different (if texture is less than
 240             32 bytes width) to the untiled case */
 241          int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
 242          size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
 243          blitWidth = MAX2(texImage->Width, 64 / texelBytes);
 244       }
 245       else {
 246          int w = (texImage->Width * texelBytes + 31) & ~31;
 247          size = w * texImage->Height * texImage->Depth;
 248          blitWidth = MAX2(texImage->Width, 64 / texelBytes);
 249       }
 250       assert(size > 0);
 251
 252       /* Align to 32-byte offset.  It is faster to do this unconditionally
 253        * (no branch penalty).
 254        */
 255
 256       curOffset = (curOffset + 0x1f) & ~0x1f;
 257
 258       if (texelBytes) {
 259          t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
 260          t->image[0][i].y = 0;
 261          t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
 262          t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
 263       }
 264       else {
 265          t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
 266          t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
 267          t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
 268          t->image[0][i].height = size / t->image[0][i].width;
 269       }
 270
 271 #if 0
 272       /* for debugging only and only  applicable to non-rectangle targets */
 273       assert(size % t->image[0][i].width == 0);
 274       assert(t->image[0][i].x == 0
 275              || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
 276 #endif
 277
 278       if (0)
 279          fprintf(stderr,
 280                  "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
 281                  i, texImage->Width, texImage->Height,
 282                  t->image[0][i].x, t->image[0][i].y,
 283                  t->image[0][i].width, t->image[0][i].height, size, curOffset);
 284
 285       curOffset += size;
 286
 287    }
 288
 289    /* Align the total size of texture memory block.
 290     */
 291    t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
 292
 293    /* Setup remaining cube face blits, if needed */
 294    if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
 295       const GLuint faceSize = t->base.totalSize;
 296       GLuint face;
 297       /* reuse face 0 x/y/width/height - just update the offset when uploading */
 298       for (face = 1; face < 6; face++) {
 299          for (i = 0; i < numLevels; i++) {
 300             t->image[face][i].x =  t->image[0][i].x;
 301             t->image[face][i].y =  t->image[0][i].y;
 302             t->image[face][i].width  = t->image[0][i].width;
 303             t->image[face][i].height = t->image[0][i].height;
 304          }
 305       }
 306       t->base.totalSize = 6 * faceSize; /* total texmem needed */
 307    }
 308
 309    /* Hardware state:
 310     */
 311    t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
 312    t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
 313
 314    t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
 315                        RADEON_TXFORMAT_HEIGHT_MASK |
 316                        RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
 317                        RADEON_TXFORMAT_F5_WIDTH_MASK |
 318                        RADEON_TXFORMAT_F5_HEIGHT_MASK);
 319    t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
 320                       (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
 321
 322    if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
 323       assert(log2Width == log2Height);
 324       t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
 325                          (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
 326                          (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
 327       t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
 328                            (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
 329                            (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
 330                            (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
 331                            (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
 332                            (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
 333                            (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
 334                            (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
 335    }
 336
 337    t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
 338                    ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
 339
 340    /* Only need to round to nearest 32 for textures, but the blitter
 341     * requires 64-byte aligned pitches, and we may/may not need the
 342     * blitter.   NPOT only!
 343     */
 344    if (baseImage->IsCompressed)
 345       t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
 346    else
 347       t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
 348    t->pp_txpitch -= 32;
 349
 350    t->dirty_state = TEX_ALL;
 351
 352    /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */
 353 }
 354
 355
 356
 357 /* ================================================================
 358  * Texture combine functions
 359  */
 360
 361 /* GL_ARB_texture_env_combine support
 362  */
 363
 364 /* The color tables have combine functions for GL_SRC_COLOR,
 365  * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
 366  */
 367 static GLuint radeon_texture_color[][RADEON_MAX_TEXTURE_UNITS] =
 368 {
 369    {
 370       RADEON_COLOR_ARG_A_T0_COLOR,
 371       RADEON_COLOR_ARG_A_T1_COLOR,
 372       RADEON_COLOR_ARG_A_T2_COLOR
 373    },
 374    {
 375       RADEON_COLOR_ARG_A_T0_COLOR | RADEON_COMP_ARG_A,
 376       RADEON_COLOR_ARG_A_T1_COLOR | RADEON_COMP_ARG_A,
 377       RADEON_COLOR_ARG_A_T2_COLOR | RADEON_COMP_ARG_A
 378    },
 379    {
 380       RADEON_COLOR_ARG_A_T0_ALPHA,
 381       RADEON_COLOR_ARG_A_T1_ALPHA,
 382       RADEON_COLOR_ARG_A_T2_ALPHA
 383    },
 384    {
 385       RADEON_COLOR_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
 386       RADEON_COLOR_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
 387       RADEON_COLOR_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
 388    },
 389 };
 390
 391 static GLuint radeon_tfactor_color[] =
 392 {
 393    RADEON_COLOR_ARG_A_TFACTOR_COLOR,
 394    RADEON_COLOR_ARG_A_TFACTOR_COLOR | RADEON_COMP_ARG_A,
 395    RADEON_COLOR_ARG_A_TFACTOR_ALPHA,
 396    RADEON_COLOR_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
 397 };
 398
 399 static GLuint radeon_primary_color[] =
 400 {
 401    RADEON_COLOR_ARG_A_DIFFUSE_COLOR,
 402    RADEON_COLOR_ARG_A_DIFFUSE_COLOR | RADEON_COMP_ARG_A,
 403    RADEON_COLOR_ARG_A_DIFFUSE_ALPHA,
 404    RADEON_COLOR_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
 405 };
 406
 407 static GLuint radeon_previous_color[] =
 408 {
 409    RADEON_COLOR_ARG_A_CURRENT_COLOR,
 410    RADEON_COLOR_ARG_A_CURRENT_COLOR | RADEON_COMP_ARG_A,
 411    RADEON_COLOR_ARG_A_CURRENT_ALPHA,
 412    RADEON_COLOR_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
 413 };
 414
 415 /* GL_ZERO table - indices 0-3
 416  * GL_ONE  table - indices 1-4
 417  */
 418 static GLuint radeon_zero_color[] =
 419 {
 420    RADEON_COLOR_ARG_A_ZERO,
 421    RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
 422    RADEON_COLOR_ARG_A_ZERO,
 423    RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
 424    RADEON_COLOR_ARG_A_ZERO
 425 };
 426
 427
 428 /* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
 429  */
 430 static GLuint radeon_texture_alpha[][RADEON_MAX_TEXTURE_UNITS] =
 431 {
 432    {
 433       RADEON_ALPHA_ARG_A_T0_ALPHA,
 434       RADEON_ALPHA_ARG_A_T1_ALPHA,
 435       RADEON_ALPHA_ARG_A_T2_ALPHA
 436    },
 437    {
 438       RADEON_ALPHA_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
 439       RADEON_ALPHA_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
 440       RADEON_ALPHA_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
 441    },
 442 };
 443
 444 static GLuint radeon_tfactor_alpha[] =
 445 {
 446    RADEON_ALPHA_ARG_A_TFACTOR_ALPHA,
 447    RADEON_ALPHA_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
 448 };
 449
 450 static GLuint radeon_primary_alpha[] =
 451 {
 452    RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA,
 453    RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
 454 };
 455
 456 static GLuint radeon_previous_alpha[] =
 457 {
 458    RADEON_ALPHA_ARG_A_CURRENT_ALPHA,
 459    RADEON_ALPHA_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
 460 };
 461
 462 /* GL_ZERO table - indices 0-1
 463  * GL_ONE  table - indices 1-2
 464  */
 465 static GLuint radeon_zero_alpha[] =
 466 {
 467    RADEON_ALPHA_ARG_A_ZERO,
 468    RADEON_ALPHA_ARG_A_ZERO | RADEON_COMP_ARG_A,
 469    RADEON_ALPHA_ARG_A_ZERO
 470 };
 471
 472
 473 /* Extract the arg from slot A, shift it into the correct argument slot
 474  * and set the corresponding complement bit.
 475  */
 476 #define RADEON_COLOR_ARG( n, arg )                      \
 477 do {                                                    \
 478    color_combine |=                                     \
 479       ((color_arg[n] & RADEON_COLOR_ARG_MASK)           \
 480        << RADEON_COLOR_ARG_##arg##_SHIFT);              \
 481    color_combine |=                                     \
 482       ((color_arg[n] >> RADEON_COMP_ARG_SHIFT)          \
 483        << RADEON_COMP_ARG_##arg##_SHIFT);               \
 484 } while (0)
 485
 486 #define RADEON_ALPHA_ARG( n, arg )                      \
 487 do {                                                    \
 488    alpha_combine |=                                     \
 489       ((alpha_arg[n] & RADEON_ALPHA_ARG_MASK)           \
 490        << RADEON_ALPHA_ARG_##arg##_SHIFT);              \
 491    alpha_combine |=                                     \
 492       ((alpha_arg[n] >> RADEON_COMP_ARG_SHIFT)          \
 493        << RADEON_COMP_ARG_##arg##_SHIFT);               \
 494 } while (0)
 495
 496
 497 /* ================================================================
 498  * Texture unit state management
 499  */
 500
 501 static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 502 {
 503    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 504    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 505    GLuint color_combine, alpha_combine;
 506    const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
 507          | RADEON_COLOR_ARG_C_CURRENT_COLOR | RADEON_BLEND_CTL_ADD
 508          | RADEON_SCALE_1X | RADEON_CLAMP_TX;
 509    const GLuint alpha_combine0 = RADEON_ALPHA_ARG_A_ZERO | RADEON_ALPHA_ARG_B_ZERO
 510          | RADEON_ALPHA_ARG_C_CURRENT_ALPHA | RADEON_BLEND_CTL_ADD
 511          | RADEON_SCALE_1X | RADEON_CLAMP_TX;
 512
 513
 514    /* texUnit->_Current can be NULL if and only if the texture unit is
 515     * not actually enabled.
 516     */
 517    assert( (texUnit->_ReallyEnabled == 0)
 518            || (texUnit->_Current != NULL) );
 519
 520    if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
 521       fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
 522    }
 523
 524    /* Set the texture environment state.  Isn't this nice and clean?
 525     * The chip will automagically set the texture alpha to 0xff when
 526     * the texture format does not include an alpha component. This
 527     * reduces the amount of special-casing we have to do, alpha-only
 528     * textures being a notable exception. Doesn't work for luminance
 529     * textures realized with I8 and ALPHA_IN_MAP not set neither (on r100).
 530     */
 531     /* Don't cache these results.
 532     */
 533    rmesa->state.texture.unit[unit].format = 0;
 534    rmesa->state.texture.unit[unit].envMode = 0;
 535
 536    if ( !texUnit->_ReallyEnabled ) {
 537       color_combine = color_combine0;
 538       alpha_combine = alpha_combine0;
 539    }
 540    else {
 541       GLuint color_arg[3], alpha_arg[3];
 542       GLuint i;
 543       const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
 544       const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
 545       GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
 546       GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
 547
 548
 549       /* Step 1:
 550        * Extract the color and alpha combine function arguments.
 551        */
 552       for ( i = 0 ; i < numColorArgs ; i++ ) {
 553          const GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
 554          const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
 555          assert(op >= 0);
 556          assert(op <= 3);
 557          switch ( srcRGBi ) {
 558          case GL_TEXTURE:
 559             if (texUnit->_Current->Image[0][0]->_BaseFormat == GL_ALPHA)
 560                color_arg[i] = radeon_zero_color[op];
 561             else
 562                color_arg[i] = radeon_texture_color[op][unit];
 563             break;
 564          case GL_CONSTANT:
 565             color_arg[i] = radeon_tfactor_color[op];
 566             break;
 567          case GL_PRIMARY_COLOR:
 568             color_arg[i] = radeon_primary_color[op];
 569             break;
 570          case GL_PREVIOUS:
 571             color_arg[i] = radeon_previous_color[op];
 572             break;
 573          case GL_ZERO:
 574             color_arg[i] = radeon_zero_color[op];
 575             break;
 576          case GL_ONE:
 577             color_arg[i] = radeon_zero_color[op+1];
 578             break;
 579          case GL_TEXTURE0:
 580          case GL_TEXTURE1:
 581          case GL_TEXTURE2: {
 582             GLuint txunit = srcRGBi - GL_TEXTURE0;
 583             if (ctx->Texture.Unit[txunit]._Current->Image[0][0]->_BaseFormat == GL_ALPHA)
 584                color_arg[i] = radeon_zero_color[op];
 585             else
 586          /* implement ogl 1.4/1.5 core spec here, not specification of
 587           * GL_ARB_texture_env_crossbar (which would require disabling blending
 588           * instead of undefined results when referencing not enabled texunit) */
 589               color_arg[i] = radeon_texture_color[op][txunit];
 590             }
 591             break;
 592          default:
 593             return GL_FALSE;
 594          }
 595       }
 596
 597       for ( i = 0 ; i < numAlphaArgs ; i++ ) {
 598          const GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
 599          const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
 600          assert(op >= 0);
 601          assert(op <= 1);
 602          switch ( srcAi ) {
 603          case GL_TEXTURE:
 604             if (texUnit->_Current->Image[0][0]->_BaseFormat == GL_LUMINANCE)
 605                alpha_arg[i] = radeon_zero_alpha[op+1];
 606             else
 607                alpha_arg[i] = radeon_texture_alpha[op][unit];
 608             break;
 609          case GL_CONSTANT:
 610             alpha_arg[i] = radeon_tfactor_alpha[op];
 611             break;
 612          case GL_PRIMARY_COLOR:
 613             alpha_arg[i] = radeon_primary_alpha[op];
 614             break;
 615          case GL_PREVIOUS:
 616             alpha_arg[i] = radeon_previous_alpha[op];
 617             break;
 618          case GL_ZERO:
 619             alpha_arg[i] = radeon_zero_alpha[op];
 620             break;
 621          case GL_ONE:
 622             alpha_arg[i] = radeon_zero_alpha[op+1];
 623             break;
 624          case GL_TEXTURE0:
 625          case GL_TEXTURE1:
 626          case GL_TEXTURE2: {
 627             GLuint txunit = srcAi - GL_TEXTURE0;
 628             if (ctx->Texture.Unit[txunit]._Current->Image[0][0]->_BaseFormat == GL_LUMINANCE)
 629                alpha_arg[i] = radeon_zero_alpha[op+1];
 630             else
 631                alpha_arg[i] = radeon_texture_alpha[op][txunit];
 632             }
 633             break;
 634          default:
 635             return GL_FALSE;
 636          }
 637       }
 638
 639       /* Step 2:
 640        * Build up the color and alpha combine functions.
 641        */
 642       switch ( texUnit->_CurrentCombine->ModeRGB ) {
 643       case GL_REPLACE:
 644          color_combine = (RADEON_COLOR_ARG_A_ZERO |
 645                           RADEON_COLOR_ARG_B_ZERO |
 646                           RADEON_BLEND_CTL_ADD |
 647                           RADEON_CLAMP_TX);
 648          RADEON_COLOR_ARG( 0, C );
 649          break;
 650       case GL_MODULATE:
 651          color_combine = (RADEON_COLOR_ARG_C_ZERO |
 652                           RADEON_BLEND_CTL_ADD |
 653                           RADEON_CLAMP_TX);
 654          RADEON_COLOR_ARG( 0, A );
 655          RADEON_COLOR_ARG( 1, B );
 656          break;
 657       case GL_ADD:
 658          color_combine = (RADEON_COLOR_ARG_B_ZERO |
 659                           RADEON_COMP_ARG_B |
 660                           RADEON_BLEND_CTL_ADD |
 661                           RADEON_CLAMP_TX);
 662          RADEON_COLOR_ARG( 0, A );
 663          RADEON_COLOR_ARG( 1, C );
 664          break;
 665       case GL_ADD_SIGNED:
 666          color_combine = (RADEON_COLOR_ARG_B_ZERO |
 667                           RADEON_COMP_ARG_B |
 668                           RADEON_BLEND_CTL_ADDSIGNED |
 669                           RADEON_CLAMP_TX);
 670          RADEON_COLOR_ARG( 0, A );
 671          RADEON_COLOR_ARG( 1, C );
 672          break;
 673       case GL_SUBTRACT:
 674          color_combine = (RADEON_COLOR_ARG_B_ZERO |
 675                           RADEON_COMP_ARG_B |
 676                           RADEON_BLEND_CTL_SUBTRACT |
 677                           RADEON_CLAMP_TX);
 678          RADEON_COLOR_ARG( 0, A );
 679          RADEON_COLOR_ARG( 1, C );
 680          break;
 681       case GL_INTERPOLATE:
 682          color_combine = (RADEON_BLEND_CTL_BLEND |
 683                           RADEON_CLAMP_TX);
 684          RADEON_COLOR_ARG( 0, B );
 685          RADEON_COLOR_ARG( 1, A );
 686          RADEON_COLOR_ARG( 2, C );
 687          break;
 688
 689       case GL_DOT3_RGB_EXT:
 690       case GL_DOT3_RGBA_EXT:
 691          /* The EXT version of the DOT3 extension does not support the
 692           * scale factor, but the ARB version (and the version in OpenGL
 693           * 1.3) does.
 694           */
 695          RGBshift = 0;
 696          /* FALLTHROUGH */
 697
 698       case GL_DOT3_RGB:
 699       case GL_DOT3_RGBA:
 700          /* The R100 / RV200 only support a 1X multiplier in hardware
 701           * w/the ARB version.
 702           */
 703          if ( RGBshift != (RADEON_SCALE_1X >> RADEON_SCALE_SHIFT) ) {
 704             return GL_FALSE;
 705          }
 706
 707          RGBshift += 2;
 708          if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
 709             || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
 710             /* is it necessary to set this or will it be ignored anyway? */
 711             Ashift = RGBshift;
 712          }
 713
 714          color_combine = (RADEON_COLOR_ARG_C_ZERO |
 715                           RADEON_BLEND_CTL_DOT3 |
 716                           RADEON_CLAMP_TX);
 717          RADEON_COLOR_ARG( 0, A );
 718          RADEON_COLOR_ARG( 1, B );
 719          break;
 720
 721       case GL_MODULATE_ADD_ATI:
 722          color_combine = (RADEON_BLEND_CTL_ADD |
 723                           RADEON_CLAMP_TX);
 724          RADEON_COLOR_ARG( 0, A );
 725          RADEON_COLOR_ARG( 1, C );
 726          RADEON_COLOR_ARG( 2, B );
 727          break;
 728       case GL_MODULATE_SIGNED_ADD_ATI:
 729          color_combine = (RADEON_BLEND_CTL_ADDSIGNED |
 730                           RADEON_CLAMP_TX);
 731          RADEON_COLOR_ARG( 0, A );
 732          RADEON_COLOR_ARG( 1, C );
 733          RADEON_COLOR_ARG( 2, B );
 734          break;
 735       case GL_MODULATE_SUBTRACT_ATI:
 736          color_combine = (RADEON_BLEND_CTL_SUBTRACT |
 737                           RADEON_CLAMP_TX);
 738          RADEON_COLOR_ARG( 0, A );
 739          RADEON_COLOR_ARG( 1, C );
 740          RADEON_COLOR_ARG( 2, B );
 741          break;
 742       default:
 743          return GL_FALSE;
 744       }
 745
 746       switch ( texUnit->_CurrentCombine->ModeA ) {
 747       case GL_REPLACE:
 748          alpha_combine = (RADEON_ALPHA_ARG_A_ZERO |
 749                           RADEON_ALPHA_ARG_B_ZERO |
 750                           RADEON_BLEND_CTL_ADD |
 751                           RADEON_CLAMP_TX);
 752          RADEON_ALPHA_ARG( 0, C );
 753          break;
 754       case GL_MODULATE:
 755          alpha_combine = (RADEON_ALPHA_ARG_C_ZERO |
 756                           RADEON_BLEND_CTL_ADD |
 757                           RADEON_CLAMP_TX);
 758          RADEON_ALPHA_ARG( 0, A );
 759          RADEON_ALPHA_ARG( 1, B );
 760          break;
 761       case GL_ADD:
 762          alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
 763                           RADEON_COMP_ARG_B |
 764                           RADEON_BLEND_CTL_ADD |
 765                           RADEON_CLAMP_TX);
 766          RADEON_ALPHA_ARG( 0, A );
 767          RADEON_ALPHA_ARG( 1, C );
 768          break;
 769       case GL_ADD_SIGNED:
 770          alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
 771                           RADEON_COMP_ARG_B |
 772                           RADEON_BLEND_CTL_ADDSIGNED |
 773                           RADEON_CLAMP_TX);
 774          RADEON_ALPHA_ARG( 0, A );
 775          RADEON_ALPHA_ARG( 1, C );
 776          break;
 777       case GL_SUBTRACT:
 778          alpha_combine = (RADEON_COLOR_ARG_B_ZERO |
 779                           RADEON_COMP_ARG_B |
 780                           RADEON_BLEND_CTL_SUBTRACT |
 781                           RADEON_CLAMP_TX);
 782          RADEON_ALPHA_ARG( 0, A );
 783          RADEON_ALPHA_ARG( 1, C );
 784          break;
 785       case GL_INTERPOLATE:
 786          alpha_combine = (RADEON_BLEND_CTL_BLEND |
 787                           RADEON_CLAMP_TX);
 788          RADEON_ALPHA_ARG( 0, B );
 789          RADEON_ALPHA_ARG( 1, A );
 790          RADEON_ALPHA_ARG( 2, C );
 791          break;
 792
 793       case GL_MODULATE_ADD_ATI:
 794          alpha_combine = (RADEON_BLEND_CTL_ADD |
 795                           RADEON_CLAMP_TX);
 796          RADEON_ALPHA_ARG( 0, A );
 797          RADEON_ALPHA_ARG( 1, C );
 798          RADEON_ALPHA_ARG( 2, B );
 799          break;
 800       case GL_MODULATE_SIGNED_ADD_ATI:
 801          alpha_combine = (RADEON_BLEND_CTL_ADDSIGNED |
 802                           RADEON_CLAMP_TX);
 803          RADEON_ALPHA_ARG( 0, A );
 804          RADEON_ALPHA_ARG( 1, C );
 805          RADEON_ALPHA_ARG( 2, B );
 806          break;
 807       case GL_MODULATE_SUBTRACT_ATI:
 808          alpha_combine = (RADEON_BLEND_CTL_SUBTRACT |
 809                           RADEON_CLAMP_TX);
 810          RADEON_ALPHA_ARG( 0, A );
 811          RADEON_ALPHA_ARG( 1, C );
 812          RADEON_ALPHA_ARG( 2, B );
 813          break;
 814       default:
 815          return GL_FALSE;
 816       }
 817
 818       if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB_EXT)
 819            || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB) ) {
 820          alpha_combine |= RADEON_DOT_ALPHA_DONT_REPLICATE;
 821       }
 822
 823       /* Step 3:
 824        * Apply the scale factor.
 825        */
 826       color_combine |= (RGBshift << RADEON_SCALE_SHIFT);
 827       alpha_combine |= (Ashift   << RADEON_SCALE_SHIFT);
 828
 829       /* All done!
 830        */
 831    }
 832
 833    if ( rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] != color_combine ||
 834         rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] != alpha_combine ) {
 835       RADEON_STATECHANGE( rmesa, tex[unit] );
 836       rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] = color_combine;
 837       rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] = alpha_combine;
 838    }
 839
 840    return GL_TRUE;
 841 }
 842
 843 #define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |       \
 844                               RADEON_MIN_FILTER_MASK |          \
 845                               RADEON_MAG_FILTER_MASK |          \
 846                               RADEON_MAX_ANISO_MASK |           \
 847                               RADEON_YUV_TO_RGB |               \
 848                               RADEON_YUV_TEMPERATURE_MASK |     \
 849                               RADEON_CLAMP_S_MASK |             \
 850                               RADEON_CLAMP_T_MASK |             \
 851                               RADEON_BORDER_MODE_D3D )
 852
 853 #define TEXOBJ_TXFORMAT_MASK (RADEON_TXFORMAT_WIDTH_MASK |      \
 854                               RADEON_TXFORMAT_HEIGHT_MASK |     \
 855                               RADEON_TXFORMAT_FORMAT_MASK |     \
 856                               RADEON_TXFORMAT_F5_WIDTH_MASK |   \
 857                               RADEON_TXFORMAT_F5_HEIGHT_MASK |  \
 858                               RADEON_TXFORMAT_ALPHA_IN_MAP |    \
 859                               RADEON_TXFORMAT_CUBIC_MAP_ENABLE |        \
 860                               RADEON_TXFORMAT_NON_POWER2)
 861
 862
 863 static void import_tex_obj_state( radeonContextPtr rmesa,
 864                                   int unit,
 865                                   radeonTexObjPtr texobj )
 866 {
 867 /* do not use RADEON_DB_STATE to avoid stale texture caches */
 868    int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
 869    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
 870
 871    RADEON_STATECHANGE( rmesa, tex[unit] );
 872
 873    cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
 874    cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
 875    cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
 876    cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
 877    cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
 878    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
 879
 880    if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
 881       GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
 882       txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
 883       txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
 884       RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] );
 885       se_coord_fmt |= RADEON_VTX_ST0_NONPARAMETRIC << unit;
 886    }
 887    else {
 888       se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit);
 889
 890       if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
 891          int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
 892          GLuint bytesPerFace = texobj->base.totalSize / 6;
 893          ASSERT(texobj->base.totalSize % 6 == 0);
 894
 895          RADEON_STATECHANGE( rmesa, cube[unit] );
 896          cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
 897          /* dont know if this setup conforms to OpenGL..
 898           * at least it matches the behavior of mesa software renderer
 899           */
 900          cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */
 901          cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */
 902          cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */
 903          cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */
 904          cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */
 905          cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */
 906       }
 907    }
 908
 909    if (se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT]) {
 910       RADEON_STATECHANGE( rmesa, set );
 911       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
 912    }
 913
 914    texobj->dirty_state &= ~(1<<unit);
 915 }
 916
 917
 918
 919
 920 static void set_texgen_matrix( radeonContextPtr rmesa,
 921                                GLuint unit,
 922                                const GLfloat *s_plane,
 923                                const GLfloat *t_plane,
 924                                const GLfloat *r_plane,
 925                                const GLfloat *q_plane )
 926 {
 927    rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
 928    rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
 929    rmesa->TexGenMatrix[unit].m[8]  = s_plane[2];
 930    rmesa->TexGenMatrix[unit].m[12] = s_plane[3];
 931
 932    rmesa->TexGenMatrix[unit].m[1]  = t_plane[0];
 933    rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
 934    rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
 935    rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
 936
 937    rmesa->TexGenMatrix[unit].m[2]  = r_plane[0];
 938    rmesa->TexGenMatrix[unit].m[6]  = r_plane[1];
 939    rmesa->TexGenMatrix[unit].m[10] = r_plane[2];
 940    rmesa->TexGenMatrix[unit].m[14] = r_plane[3];
 941
 942    rmesa->TexGenMatrix[unit].m[3]  = q_plane[0];
 943    rmesa->TexGenMatrix[unit].m[7]  = q_plane[1];
 944    rmesa->TexGenMatrix[unit].m[11] = q_plane[2];
 945    rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
 946
 947    rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
 948    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
 949 }
 950
 951 /* Returns GL_FALSE if fallback required.
 952  */
 953 static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
 954 {
 955    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 956    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 957    GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
 958    GLuint tmp = rmesa->TexGenEnabled;
 959    static const GLfloat reflect[16] = {
 960       -1,  0,  0,  0,
 961        0, -1,  0,  0,
 962        0,  0,  -1, 0,
 963        0,  0,  0,  1 };
 964
 965    rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE << unit);
 966    rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE << unit);
 967    rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK << inputshift);
 968    rmesa->TexGenNeedNormals[unit] = 0;
 969
 970    if ((texUnit->TexGenEnabled & (S_BIT|T_BIT|R_BIT|Q_BIT)) == 0) {
 971       /* Disabled, no fallback:
 972        */
 973       rmesa->TexGenEnabled |=
 974          (RADEON_TEXGEN_INPUT_TEXCOORD_0 + unit) << inputshift;
 975       return GL_TRUE;
 976    }
 977    /* the r100 cannot do texgen for some coords and not for others
 978     * we do not detect such cases (certainly can't do it here) and just
 979     * ASSUME that when S and T are texgen enabled we do not need other
 980     * non-texgen enabled coords, no matter if the R and Q bits are texgen
 981     * enabled. Still check for mixed mode texgen for all coords.
 982     */
 983    else if ( (texUnit->TexGenEnabled & S_BIT) &&
 984              (texUnit->TexGenEnabled & T_BIT) &&
 985              (texUnit->GenModeS == texUnit->GenModeT) ) {
 986       if ( ((texUnit->TexGenEnabled & R_BIT) &&
 987             (texUnit->GenModeS != texUnit->GenModeR)) ||
 988            ((texUnit->TexGenEnabled & Q_BIT) &&
 989             (texUnit->GenModeS != texUnit->GenModeQ)) ) {
 990          /* Mixed modes, fallback:
 991           */
 992          if (RADEON_DEBUG & DEBUG_FALLBACKS)
 993             fprintf(stderr, "fallback mixed texgen\n");
 994          return GL_FALSE;
 995       }
 996       rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
 997    }
 998    else {
 999    /* some texgen mode not including both S and T bits */
1000       if (RADEON_DEBUG & DEBUG_FALLBACKS)
1001          fprintf(stderr, "fallback mixed texgen/nontexgen\n");
1002       return GL_FALSE;
1003    }
1004
1005    if ((texUnit->TexGenEnabled & (R_BIT | Q_BIT)) != 0) {
1006       /* need this here for vtxfmt presumably. Argh we need to set
1007          this from way too many places, would be much easier if we could leave
1008          tcl q coord always enabled as on r200) */
1009       RADEON_STATECHANGE( rmesa, tcl );
1010       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_Q_BIT(unit);
1011    }
1012
1013    switch (texUnit->GenModeS) {
1014    case GL_OBJECT_LINEAR:
1015       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift;
1016       set_texgen_matrix( rmesa, unit,
1017                          texUnit->ObjectPlaneS,
1018                          texUnit->ObjectPlaneT,
1019                          texUnit->ObjectPlaneR,
1020                          texUnit->ObjectPlaneQ);
1021       break;
1022
1023    case GL_EYE_LINEAR:
1024       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift;
1025       set_texgen_matrix( rmesa, unit,
1026                          texUnit->EyePlaneS,
1027                          texUnit->EyePlaneT,
1028                          texUnit->EyePlaneR,
1029                          texUnit->EyePlaneQ);
1030       break;
1031
1032    case GL_REFLECTION_MAP_NV:
1033       rmesa->TexGenNeedNormals[unit] = GL_TRUE;
1034       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT << inputshift;
1035       /* TODO: unknown if this is needed/correct */
1036       set_texgen_matrix( rmesa, unit, reflect, reflect + 4,
1037                         reflect + 8, reflect + 12 );
1038       break;
1039
1040    case GL_NORMAL_MAP_NV:
1041       rmesa->TexGenNeedNormals[unit] = GL_TRUE;
1042       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL << inputshift;
1043       break;
1044
1045    case GL_SPHERE_MAP:
1046       /* the mode which everyone uses :-( */
1047    default:
1048       /* Unsupported mode, fallback:
1049        */
1050       if (RADEON_DEBUG & DEBUG_FALLBACKS)
1051          fprintf(stderr, "fallback GL_SPHERE_MAP\n");
1052       return GL_FALSE;
1053    }
1054
1055    if (tmp != rmesa->TexGenEnabled) {
1056       rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1057    }
1058
1059    return GL_TRUE;
1060 }
1061
1062
1063 static void disable_tex( GLcontext *ctx, int unit )
1064 {
1065    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1066
1067    if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
1068       /* Texture unit disabled */
1069       if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
1070          /* The old texture is no longer bound to this texture unit.
1071           * Mark it as such.
1072           */
1073
1074          rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
1075          rmesa->state.texture.unit[unit].texobj = NULL;
1076       }
1077
1078       RADEON_STATECHANGE( rmesa, ctx );
1079       rmesa->hw.ctx.cmd[CTX_PP_CNTL] &=
1080           ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
1081
1082       RADEON_STATECHANGE( rmesa, tcl );
1083       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
1084                                                 RADEON_Q_BIT(unit));
1085
1086       if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
1087          TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
1088          rmesa->recheck_texgen[unit] = GL_TRUE;
1089       }
1090
1091       if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
1092       /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
1093          cubic_map bit on unit 2 when the unit is disabled, otherwise every
1094          2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
1095          units, better be safe than sorry though).*/
1096          RADEON_STATECHANGE( rmesa, tex[unit] );
1097          rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
1098       }
1099
1100       {
1101          GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
1102          GLuint tmp = rmesa->TexGenEnabled;
1103
1104          rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
1105          rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
1106          rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
1107          rmesa->TexGenNeedNormals[unit] = 0;
1108          rmesa->TexGenEnabled |=
1109              (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
1110
1111          if (tmp != rmesa->TexGenEnabled) {
1112             rmesa->recheck_texgen[unit] = GL_TRUE;
1113             rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1114          }
1115       }
1116    }
1117 }
1118
1119 static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
1120 {
1121    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1122    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1123    struct gl_texture_object *tObj = texUnit->_Current;
1124    radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
1125
1126    /* Need to load the 2d images associated with this unit.
1127     */
1128    if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
1129       t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
1130       t->base.dirty_images[0] = ~0;
1131    }
1132
1133    ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
1134
1135    if ( t->base.dirty_images[0] ) {
1136       RADEON_FIREVERTICES( rmesa );
1137       radeonSetTexImages( rmesa, tObj );
1138       radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
1139       if ( !t->base.memBlock )
1140         return GL_FALSE;
1141    }
1142
1143    return GL_TRUE;
1144 }
1145
1146 static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
1147 {
1148    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1149    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1150    struct gl_texture_object *tObj = texUnit->_Current;
1151    radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
1152    GLuint face;
1153
1154    /* Need to load the 2d images associated with this unit.
1155     */
1156    if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
1157       t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
1158       for (face = 0; face < 6; face++)
1159          t->base.dirty_images[face] = ~0;
1160    }
1161
1162    ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
1163
1164    if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
1165         t->base.dirty_images[2] || t->base.dirty_images[3] ||
1166         t->base.dirty_images[4] || t->base.dirty_images[5] ) {
1167       /* flush */
1168       RADEON_FIREVERTICES( rmesa );
1169       /* layout memory space, once for all faces */
1170       radeonSetTexImages( rmesa, tObj );
1171    }
1172
1173    /* upload (per face) */
1174    for (face = 0; face < 6; face++) {
1175       if (t->base.dirty_images[face]) {
1176          radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face );
1177       }
1178    }
1179
1180    if ( !t->base.memBlock ) {
1181       /* texmem alloc failed, use s/w fallback */
1182       return GL_FALSE;
1183    }
1184
1185    return GL_TRUE;
1186 }
1187
1188 static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
1189 {
1190    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1191    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1192    struct gl_texture_object *tObj = texUnit->_Current;
1193    radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
1194
1195    if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) {
1196       t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
1197       t->base.dirty_images[0] = ~0;
1198    }
1199
1200    ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
1201
1202    if ( t->base.dirty_images[0] ) {
1203       RADEON_FIREVERTICES( rmesa );
1204       radeonSetTexImages( rmesa, tObj );
1205       radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
1206       if ( !t->base.memBlock /* && !rmesa->prefer_gart_client_texturing  FIXME */ ) {
1207          fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
1208          return GL_FALSE;
1209       }
1210    }
1211
1212    return GL_TRUE;
1213 }
1214
1215
1216 static GLboolean update_tex_common( GLcontext *ctx, int unit )
1217 {
1218    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1219    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1220    struct gl_texture_object *tObj = texUnit->_Current;
1221    radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
1222    GLenum format;
1223
1224    /* Fallback if there's a texture border */
1225    if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
1226       fprintf(stderr, "%s: border\n", __FUNCTION__);
1227       return GL_FALSE;
1228    }
1229    /* yuv conversion only works in first unit */
1230    if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
1231       return GL_FALSE;
1232
1233    /* Update state if this is a different texture object to last
1234     * time.
1235     */
1236    if ( rmesa->state.texture.unit[unit].texobj != t ) {
1237       if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
1238          /* The old texture is no longer bound to this texture unit.
1239           * Mark it as such.
1240           */
1241
1242          rmesa->state.texture.unit[unit].texobj->base.bound &=
1243              ~(1UL << unit);
1244       }
1245
1246       rmesa->state.texture.unit[unit].texobj = t;
1247       t->base.bound |= (1UL << unit);
1248       t->dirty_state |= 1<<unit;
1249       driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
1250    }
1251
1252
1253    /* Newly enabled?
1254     */
1255    if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
1256       RADEON_STATECHANGE( rmesa, ctx );
1257       rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=
1258           (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
1259
1260       RADEON_STATECHANGE( rmesa, tcl );
1261
1262       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
1263
1264       rmesa->recheck_texgen[unit] = GL_TRUE;
1265    }
1266
1267    if (t->dirty_state & (1<<unit)) {
1268       import_tex_obj_state( rmesa, unit, t );
1269       /* may need to update texture matrix (for texrect adjustments) */
1270       rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1271    }
1272
1273    if (rmesa->recheck_texgen[unit]) {
1274       GLboolean fallback = !radeon_validate_texgen( ctx, unit );
1275       TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
1276       rmesa->recheck_texgen[unit] = 0;
1277       rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1278    }
1279
1280    format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
1281    if ( rmesa->state.texture.unit[unit].format != format ||
1282         rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
1283       rmesa->state.texture.unit[unit].format = format;
1284       rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
1285       if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
1286          return GL_FALSE;
1287       }
1288    }
1289
1290    FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
1291    return !t->border_fallback;
1292 }
1293
1294
1295
1296 static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
1297 {
1298    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1299
1300    if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
1301       return (enable_tex_rect( ctx, unit ) &&
1302               update_tex_common( ctx, unit ));
1303    }
1304    else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
1305       return (enable_tex_2d( ctx, unit ) &&
1306               update_tex_common( ctx, unit ));
1307    }
1308    else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
1309       return (enable_tex_cube( ctx, unit ) &&
1310               update_tex_common( ctx, unit ));
1311    }
1312    else if ( texUnit->_ReallyEnabled ) {
1313       return GL_FALSE;
1314    }
1315    else {
1316       disable_tex( ctx, unit );
1317       return GL_TRUE;
1318    }
1319 }
1320
1321 void radeonUpdateTextureState( GLcontext *ctx )
1322 {
1323    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1324    GLboolean ok;
1325
1326    ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
1327          radeonUpdateTextureUnit( ctx, 1 ) &&
1328          radeonUpdateTextureUnit( ctx, 2 ));
1329
1330    FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
1331
1332    if (rmesa->TclFallback)
1333       radeonChooseVertexState( ctx );
1334 }