src/mesa/drivers/dri/radeon/radeon_tcl.c

   1 /* $XFree86$ */
   2 /**************************************************************************
   3
   4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   5                      Tungsten Graphics Inc., Austin, Texas.
   6
   7 All Rights Reserved.
   8
   9 Permission is hereby granted, free of charge, to any person obtaining
  10 a copy of this software and associated documentation files (the
  11 "Software"), to deal in the Software without restriction, including
  12 without limitation the rights to use, copy, modify, merge, publish,
  13 distribute, sublicense, and/or sell copies of the Software, and to
  14 permit persons to whom the Software is furnished to do so, subject to
  15 the following conditions:
  16
  17 The above copyright notice and this permission notice (including the
  18 next paragraph) shall be included in all copies or substantial
  19 portions of the Software.
  20
  21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28
  29 **************************************************************************/
  30
  31 /*
  32  * Authors:
  33  *   Keith Whitwell <keith@tungstengraphics.com>
  34  */
  35
  36 #include "glheader.h"
  37 #include "imports.h"
  38 #include "light.h"
  39 #include "mtypes.h"
  40 #include "enums.h"
  41
  42 #include "array_cache/acache.h"
  43 #include "tnl/tnl.h"
  44 #include "tnl/t_pipeline.h"
  45
  46 #include "radeon_context.h"
  47 #include "radeon_state.h"
  48 #include "radeon_ioctl.h"
  49 #include "radeon_tex.h"
  50 #include "radeon_tcl.h"
  51 #include "radeon_swtcl.h"
  52 #include "radeon_maos.h"
  53
  54
  55
  56 /*
  57  * Render unclipped vertex buffers by emitting vertices directly to
  58  * dma buffers.  Use strip/fan hardware primitives where possible.
  59  * Try to simulate missing primitives with indexed vertices.
  60  */
  61 #define HAVE_POINTS      1
  62 #define HAVE_LINES       1
  63 #define HAVE_LINE_LOOP   0
  64 #define HAVE_LINE_STRIPS 1
  65 #define HAVE_TRIANGLES   1
  66 #define HAVE_TRI_STRIPS  1
  67 #define HAVE_TRI_STRIP_1 0
  68 #define HAVE_TRI_FANS    1
  69 #define HAVE_QUADS       0
  70 #define HAVE_QUAD_STRIPS 0
  71 #define HAVE_POLYGONS    1
  72 #define HAVE_ELTS        1
  73
  74
  75 #define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
  76 #define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
  77 #define HW_LINE_LOOP        0
  78 #define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
  79 #define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
  80 #define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
  81 #define HW_TRIANGLE_STRIP_1 0
  82 #define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
  83 #define HW_QUADS            0
  84 #define HW_QUAD_STRIP       0
  85 #define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
  86
  87
  88 static GLboolean discrete_prim[0x10] = {
  89    0,                           /* 0 none */
  90    1,                           /* 1 points */
  91    1,                           /* 2 lines */
  92    0,                           /* 3 line_strip */
  93    1,                           /* 4 tri_list */
  94    0,                           /* 5 tri_fan */
  95    0,                           /* 6 tri_type2 */
  96    1,                           /* 7 rect list (unused) */
  97    1,                           /* 8 3vert point */
  98    1,                           /* 9 3vert line */
  99    0,
 100    0,
 101    0,
 102    0,
 103    0,
 104    0,
 105 };
 106
 107
 108 #define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
 109 #define ELT_TYPE  GLushort
 110
 111 #define ELT_INIT(prim, hw_prim) \
 112    radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
 113
 114 #define GET_MESA_ELTS() rmesa->tcl.Elts
 115
 116
 117 /* Don't really know how many elts will fit in what's left of cmdbuf,
 118  * as there is state to emit, etc:
 119  */
 120
 121 /* Testing on isosurf shows a maximum around here.  Don't know if it's
 122  * the card or driver or kernel module that is causing the behaviour.
 123  */
 124 #define GET_MAX_HW_ELTS() 300
 125
 126
 127 #define RESET_STIPPLE() do {                    \
 128    RADEON_STATECHANGE( rmesa, lin );            \
 129    radeonEmitState( rmesa );                    \
 130 } while (0)
 131
 132 #define AUTO_STIPPLE( mode )  do {              \
 133    RADEON_STATECHANGE( rmesa, lin );            \
 134    if (mode)                                    \
 135       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \
 136          RADEON_LINE_PATTERN_AUTO_RESET;        \
 137    else                                         \
 138       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
 139          ~RADEON_LINE_PATTERN_AUTO_RESET;       \
 140    radeonEmitState( rmesa );                    \
 141 } while (0)
 142
 143
 144
 145 #define ALLOC_ELTS(nr)  radeonAllocElts( rmesa, nr )
 146
 147 static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr )
 148 {
 149    if (rmesa->dma.flush)
 150       rmesa->dma.flush( rmesa );
 151
 152    radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
 153                            rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
 154
 155    radeonEmitAOS( rmesa,
 156                 rmesa->tcl.aos_components,
 157                 rmesa->tcl.nr_aos_components, 0 );
 158
 159    return radeonAllocEltsOpenEnded( rmesa,
 160                                     rmesa->tcl.vertex_format,
 161                                     rmesa->tcl.hw_primitive, nr );
 162 }
 163
 164 #define CLOSE_ELTS()  RADEON_NEWPRIM( rmesa )
 165
 166
 167
 168 /* TODO: Try to extend existing primitive if both are identical,
 169  * discrete and there are no intervening state changes.  (Somewhat
 170  * duplicates changes to DrawArrays code)
 171  */
 172 static void radeonEmitPrim( GLcontext *ctx,
 173                        GLenum prim,
 174                        GLuint hwprim,
 175                        GLuint start,
 176                        GLuint count)
 177 {
 178    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 179    radeonTclPrimitive( ctx, prim, hwprim );
 180
 181    radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
 182                             rmesa->hw.max_state_size + VBUF_BUFSZ );
 183
 184    radeonEmitAOS( rmesa,
 185                   rmesa->tcl.aos_components,
 186                   rmesa->tcl.nr_aos_components,
 187                   start );
 188
 189    /* Why couldn't this packet have taken an offset param?
 190     */
 191    radeonEmitVbufPrim( rmesa,
 192                        rmesa->tcl.vertex_format,
 193                        rmesa->tcl.hw_primitive,
 194                        count - start );
 195 }
 196
 197 #define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
 198    radeonEmitPrim( ctx, prim, hwprim, start, count );           \
 199    (void) rmesa; } while (0)
 200
 201 /* Try & join small primitives
 202  */
 203 #if 0
 204 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
 205 #else
 206 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )                    \
 207   ((NR) < 20 ||                                                 \
 208    ((NR) < 40 &&                                                \
 209     rmesa->tcl.hw_primitive == (PRIM|                           \
 210                             RADEON_CP_VC_CNTL_PRIM_WALK_IND|    \
 211                             RADEON_CP_VC_CNTL_TCL_ENABLE)))
 212 #endif
 213
 214 #ifdef MESA_BIG_ENDIAN
 215 /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
 216 #define EMIT_ELT(dest, offset, x) do {                          \
 217         int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );     \
 218         GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );    \
 219         (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x);     \
 220         (void)rmesa; } while (0)
 221 #else
 222 #define EMIT_ELT(dest, offset, x) do {                          \
 223         (dest)[offset] = (GLushort) (x);                        \
 224         (void)rmesa; } while (0)
 225 #endif
 226
 227 #define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
 228
 229
 230
 231 #define TAG(x) tcl_##x
 232 #include "tnl_dd/t_dd_dmatmp2.h"
 233
 234 /**********************************************************************/
 235 /*                          External entrypoints                     */
 236 /**********************************************************************/
 237
 238 void radeonEmitPrimitive( GLcontext *ctx,
 239                           GLuint first,
 240                           GLuint last,
 241                           GLuint flags )
 242 {
 243    tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 244 }
 245
 246 void radeonEmitEltPrimitive( GLcontext *ctx,
 247                              GLuint first,
 248                              GLuint last,
 249                              GLuint flags )
 250 {
 251    tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 252 }
 253
 254 void radeonTclPrimitive( GLcontext *ctx,
 255                          GLenum prim,
 256                          int hw_prim )
 257 {
 258    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 259    GLuint se_cntl;
 260    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
 261
 262    if (newprim != rmesa->tcl.hw_primitive ||
 263        !discrete_prim[hw_prim&0xf]) {
 264       RADEON_NEWPRIM( rmesa );
 265       rmesa->tcl.hw_primitive = newprim;
 266    }
 267
 268    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
 269    se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
 270
 271    if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE))
 272       se_cntl |= RADEON_FLAT_SHADE_VTX_0;
 273    else
 274       se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
 275
 276    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
 277       RADEON_STATECHANGE( rmesa, set );
 278       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
 279    }
 280 }
 281
 282 /**********************************************************************/
 283 /*             Fog blend factor computation for hw tcl                */
 284 /*             same calculation used as in t_vb_fog.c                 */
 285 /**********************************************************************/
 286
 287 #define FOG_EXP_TABLE_SIZE 256
 288 #define FOG_MAX (10.0)
 289 #define EXP_FOG_MAX .0006595
 290 #define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE)
 291 static GLfloat exp_table[FOG_EXP_TABLE_SIZE];
 292
 293 #if 1
 294 #define NEG_EXP( result, narg )                                         \
 295 do {                                                                    \
 296    GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR));                       \
 297    GLint k = (GLint) f;                                                 \
 298    if (k > FOG_EXP_TABLE_SIZE-2)                                        \
 299       result = (GLfloat) EXP_FOG_MAX;                                   \
 300    else                                                                 \
 301       result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]);      \
 302 } while (0)
 303 #else
 304 #define NEG_EXP( result, narg )                                 \
 305 do {                                                            \
 306    result = exp(-narg);                                         \
 307 } while (0)
 308 #endif
 309
 310
 311 /**
 312  * Initialize the exp_table[] lookup table for approximating exp().
 313  */
 314 void
 315 radeonInitStaticFogData( void )
 316 {
 317    GLfloat f = 0.0F;
 318    GLint i = 0;
 319    for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) {
 320       exp_table[i] = (GLfloat) exp(-f);
 321    }
 322 }
 323
 324
 325 /**
 326  * Compute per-vertex fog blend factors from fog coordinates by
 327  * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function.
 328  * Fog coordinates are distances from the eye (typically between the
 329  * near and far clip plane distances).
 330  * Note the fog (eye Z) coords may be negative so we use ABS(z) below.
 331  * Fog blend factors are in the range [0,1].
 332  */
 333 float
 334 radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
 335 {
 336    GLfloat end  = ctx->Fog.End;
 337    GLfloat d, temp;
 338    const GLfloat z = FABSF(fogcoord);
 339
 340    switch (ctx->Fog.Mode) {
 341    case GL_LINEAR:
 342       if (ctx->Fog.Start == ctx->Fog.End)
 343          d = 1.0F;
 344       else
 345          d = 1.0F / (ctx->Fog.End - ctx->Fog.Start);
 346       temp = (end - z) * d;
 347       return CLAMP(temp, 0.0F, 1.0F);
 348       break;
 349    case GL_EXP:
 350       d = ctx->Fog.Density;
 351       NEG_EXP( temp, d * z );
 352       return temp;
 353       break;
 354    case GL_EXP2:
 355       d = ctx->Fog.Density*ctx->Fog.Density;
 356       NEG_EXP( temp, d * z * z );
 357       return temp;
 358       break;
 359    default:
 360       _mesa_problem(ctx, "Bad fog mode in make_fog_coord");
 361       return 0;
 362    }
 363 }
 364
 365 /**********************************************************************/
 366 /*                          Render pipeline stage                     */
 367 /**********************************************************************/
 368
 369
 370 /* TCL render.
 371  */
 372 static GLboolean radeon_run_tcl_render( GLcontext *ctx,
 373                                         struct tnl_pipeline_stage *stage )
 374 {
 375    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 376    TNLcontext *tnl = TNL_CONTEXT(ctx);
 377    struct vertex_buffer *VB = &tnl->vb;
 378    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
 379    GLuint i;
 380
 381    /* TODO: separate this from the swtnl pipeline
 382     */
 383    if (rmesa->TclFallback)
 384       return GL_TRUE;   /* fallback to software t&l */
 385
 386    if (VB->Count == 0)
 387       return GL_FALSE;
 388
 389    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
 390     * inputs.
 391     */
 392    if (ctx->Light.Enabled) {
 393       inputs |= VERT_BIT_NORMAL;
 394       if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
 395          inputs |= VERT_BIT_COLOR1;
 396       }
 397    }
 398
 399    if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
 400       inputs |= VERT_BIT_FOG;
 401    }
 402
 403    for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
 404       if (ctx->Texture.Unit[i]._ReallyEnabled) {
 405       /* TODO: probably should not emit texture coords when texgen is enabled */
 406          if (rmesa->TexGenNeedNormals[i]) {
 407             inputs |= VERT_BIT_NORMAL;
 408          }
 409          inputs |= VERT_BIT_TEX(i);
 410       }
 411    }
 412
 413    radeonReleaseArrays( ctx, ~0 );
 414    radeonEmitArrays( ctx, inputs );
 415
 416    rmesa->tcl.Elts = VB->Elts;
 417
 418    for (i = 0 ; i < VB->PrimitiveCount ; i++)
 419    {
 420       GLuint prim = VB->Primitive[i].mode;
 421       GLuint start = VB->Primitive[i].start;
 422       GLuint length = VB->Primitive[i].count;
 423
 424       if (!length)
 425          continue;
 426
 427       if (rmesa->tcl.Elts)
 428          radeonEmitEltPrimitive( ctx, start, start+length, prim );
 429       else
 430          radeonEmitPrimitive( ctx, start, start+length, prim );
 431    }
 432
 433    return GL_FALSE;             /* finished the pipe */
 434 }
 435
 436
 437
 438 /* Initial state for tcl stage.
 439  */
 440 const struct tnl_pipeline_stage _radeon_tcl_stage =
 441 {
 442    "radeon render",
 443    NULL,
 444    NULL,
 445    NULL,
 446    NULL,
 447    radeon_run_tcl_render        /* run */
 448 };
 449
 450
 451
 452 /**********************************************************************/
 453 /*                 Validate state at pipeline start                   */
 454 /**********************************************************************/
 455
 456
 457 /*-----------------------------------------------------------------------
 458  * Manage TCL fallbacks
 459  */
 460
 461
 462 static void transition_to_swtnl( GLcontext *ctx )
 463 {
 464    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 465    TNLcontext *tnl = TNL_CONTEXT(ctx);
 466    GLuint se_cntl;
 467
 468    RADEON_NEWPRIM( rmesa );
 469    rmesa->swtcl.vertex_format = 0;
 470
 471    radeonChooseVertexState( ctx );
 472    radeonChooseRenderState( ctx );
 473
 474    _mesa_validate_all_lighting_tables( ctx );
 475
 476    tnl->Driver.NotifyMaterialChange =
 477       _mesa_validate_all_lighting_tables;
 478
 479    radeonReleaseArrays( ctx, ~0 );
 480
 481    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
 482    se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
 483
 484    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
 485       RADEON_STATECHANGE( rmesa, set );
 486       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
 487    }
 488 }
 489
 490
 491 static void transition_to_hwtnl( GLcontext *ctx )
 492 {
 493    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 494    TNLcontext *tnl = TNL_CONTEXT(ctx);
 495    GLuint se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
 496                           RADEON_TEX1_W_ROUTING_USE_Q1);
 497
 498    if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
 499       RADEON_STATECHANGE( rmesa, set );
 500       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
 501       _tnl_need_projected_coords( ctx, GL_FALSE );
 502    }
 503
 504    radeonUpdateMaterial( ctx );
 505
 506    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
 507
 508    if ( rmesa->dma.flush )
 509       rmesa->dma.flush( rmesa );
 510
 511    rmesa->dma.flush = NULL;
 512    rmesa->swtcl.vertex_format = 0;
 513
 514    if (rmesa->swtcl.indexed_verts.buf)
 515       radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
 516                               __FUNCTION__ );
 517
 518    if (RADEON_DEBUG & DEBUG_FALLBACKS)
 519       fprintf(stderr, "Radeon end tcl fallback\n");
 520 }
 521
 522 static char *fallbackStrings[] = {
 523    "Rasterization fallback",
 524    "Unfilled triangles",
 525    "Twosided lighting, differing materials",
 526    "Materials in VB (maybe between begin/end)",
 527    "Texgen unit 0",
 528    "Texgen unit 1",
 529    "Texgen unit 2",
 530    "User disable",
 531    "Fogcoord with separate specular lighting"
 532 };
 533
 534
 535 static char *getFallbackString(GLuint bit)
 536 {
 537    int i = 0;
 538    while (bit > 1) {
 539       i++;
 540       bit >>= 1;
 541    }
 542    return fallbackStrings[i];
 543 }
 544
 545
 546
 547 void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 548 {
 549    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 550    GLuint oldfallback = rmesa->TclFallback;
 551
 552    if (mode) {
 553       rmesa->TclFallback |= bit;
 554       if (oldfallback == 0) {
 555          if (RADEON_DEBUG & DEBUG_FALLBACKS)
 556             fprintf(stderr, "Radeon begin tcl fallback %s\n",
 557                     getFallbackString( bit ));
 558          transition_to_swtnl( ctx );
 559       }
 560    }
 561    else {
 562       rmesa->TclFallback &= ~bit;
 563       if (oldfallback == bit) {
 564          if (RADEON_DEBUG & DEBUG_FALLBACKS)
 565             fprintf(stderr, "Radeon end tcl fallback %s\n",
 566                     getFallbackString( bit ));
 567          transition_to_hwtnl( ctx );
 568       }
 569    }
 570 }