src/mesa/drivers/dri/radeon/radeon_tcl.c

   1 /**************************************************************************
   2
   3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   4                      Tungsten Graphics Inc., Austin, Texas.
   5
   6 All Rights Reserved.
   7
   8 Permission is hereby granted, free of charge, to any person obtaining
   9 a copy of this software and associated documentation files (the
  10 "Software"), to deal in the Software without restriction, including
  11 without limitation the rights to use, copy, modify, merge, publish,
  12 distribute, sublicense, and/or sell copies of the Software, and to
  13 permit persons to whom the Software is furnished to do so, subject to
  14 the following conditions:
  15
  16 The above copyright notice and this permission notice (including the
  17 next paragraph) shall be included in all copies or substantial
  18 portions of the Software.
  19
  20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27
  28 **************************************************************************/
  29
  30 /*
  31  * Authors:
  32  *   Keith Whitwell <keith@tungstengraphics.com>
  33  */
  34
  35 #include "main/glheader.h"
  36 #include "main/imports.h"
  37 #include "main/light.h"
  38 #include "main/mtypes.h"
  39 #include "main/enums.h"
  40
  41 #include "vbo/vbo.h"
  42 #include "tnl/tnl.h"
  43 #include "tnl/t_pipeline.h"
  44
  45 #include "radeon_context.h"
  46 #include "radeon_state.h"
  47 #include "radeon_ioctl.h"
  48 #include "radeon_tex.h"
  49 #include "radeon_tcl.h"
  50 #include "radeon_swtcl.h"
  51 #include "radeon_maos.h"
  52
  53
  54
  55 /*
  56  * Render unclipped vertex buffers by emitting vertices directly to
  57  * dma buffers.  Use strip/fan hardware primitives where possible.
  58  * Try to simulate missing primitives with indexed vertices.
  59  */
  60 #define HAVE_POINTS      1
  61 #define HAVE_LINES       1
  62 #define HAVE_LINE_LOOP   0
  63 #define HAVE_LINE_STRIPS 1
  64 #define HAVE_TRIANGLES   1
  65 #define HAVE_TRI_STRIPS  1
  66 #define HAVE_TRI_STRIP_1 0
  67 #define HAVE_TRI_FANS    1
  68 #define HAVE_QUADS       0
  69 #define HAVE_QUAD_STRIPS 0
  70 #define HAVE_POLYGONS    1
  71 #define HAVE_ELTS        1
  72
  73
  74 #define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
  75 #define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
  76 #define HW_LINE_LOOP        0
  77 #define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
  78 #define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
  79 #define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
  80 #define HW_TRIANGLE_STRIP_1 0
  81 #define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
  82 #define HW_QUADS            0
  83 #define HW_QUAD_STRIP       0
  84 #define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
  85
  86
  87 static GLboolean discrete_prim[0x10] = {
  88    0,                           /* 0 none */
  89    1,                           /* 1 points */
  90    1,                           /* 2 lines */
  91    0,                           /* 3 line_strip */
  92    1,                           /* 4 tri_list */
  93    0,                           /* 5 tri_fan */
  94    0,                           /* 6 tri_type2 */
  95    1,                           /* 7 rect list (unused) */
  96    1,                           /* 8 3vert point */
  97    1,                           /* 9 3vert line */
  98    0,
  99    0,
 100    0,
 101    0,
 102    0,
 103    0,
 104 };
 105
 106
 107 #define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
 108 #define ELT_TYPE  GLushort
 109
 110 #define ELT_INIT(prim, hw_prim) \
 111    radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
 112
 113 #define GET_MESA_ELTS() rmesa->tcl.Elts
 114
 115
 116 /* Don't really know how many elts will fit in what's left of cmdbuf,
 117  * as there is state to emit, etc:
 118  */
 119
 120 /* Testing on isosurf shows a maximum around here.  Don't know if it's
 121  * the card or driver or kernel module that is causing the behaviour.
 122  */
 123 #define GET_MAX_HW_ELTS() 300
 124
 125
 126 #define RESET_STIPPLE() do {                    \
 127    RADEON_STATECHANGE( rmesa, lin );            \
 128    radeonEmitState( rmesa );                    \
 129 } while (0)
 130
 131 #define AUTO_STIPPLE( mode )  do {              \
 132    RADEON_STATECHANGE( rmesa, lin );            \
 133    if (mode)                                    \
 134       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \
 135          RADEON_LINE_PATTERN_AUTO_RESET;        \
 136    else                                         \
 137       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
 138          ~RADEON_LINE_PATTERN_AUTO_RESET;       \
 139    radeonEmitState( rmesa );                    \
 140 } while (0)
 141
 142
 143
 144 #define ALLOC_ELTS(nr)  radeonAllocElts( rmesa, nr )
 145
 146 static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr )
 147 {
 148    if (rmesa->dma.flush)
 149       rmesa->dma.flush( rmesa );
 150
 151    radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
 152                            rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
 153
 154    radeonEmitAOS( rmesa,
 155                 rmesa->tcl.aos_components,
 156                 rmesa->tcl.nr_aos_components, 0 );
 157
 158    return radeonAllocEltsOpenEnded( rmesa,
 159                                     rmesa->tcl.vertex_format,
 160                                     rmesa->tcl.hw_primitive, nr );
 161 }
 162
 163 #define CLOSE_ELTS()  RADEON_NEWPRIM( rmesa )
 164
 165
 166
 167 /* TODO: Try to extend existing primitive if both are identical,
 168  * discrete and there are no intervening state changes.  (Somewhat
 169  * duplicates changes to DrawArrays code)
 170  */
 171 static void radeonEmitPrim( GLcontext *ctx,
 172                        GLenum prim,
 173                        GLuint hwprim,
 174                        GLuint start,
 175                        GLuint count)
 176 {
 177    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 178    radeonTclPrimitive( ctx, prim, hwprim );
 179
 180    radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
 181                             rmesa->hw.max_state_size + VBUF_BUFSZ );
 182
 183    radeonEmitAOS( rmesa,
 184                   rmesa->tcl.aos_components,
 185                   rmesa->tcl.nr_aos_components,
 186                   start );
 187
 188    /* Why couldn't this packet have taken an offset param?
 189     */
 190    radeonEmitVbufPrim( rmesa,
 191                        rmesa->tcl.vertex_format,
 192                        rmesa->tcl.hw_primitive,
 193                        count - start );
 194 }
 195
 196 #define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
 197    radeonEmitPrim( ctx, prim, hwprim, start, count );           \
 198    (void) rmesa; } while (0)
 199
 200 /* Try & join small primitives
 201  */
 202 #if 0
 203 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
 204 #else
 205 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )                    \
 206   ((NR) < 20 ||                                                 \
 207    ((NR) < 40 &&                                                \
 208     rmesa->tcl.hw_primitive == (PRIM|                           \
 209                             RADEON_CP_VC_CNTL_PRIM_WALK_IND|    \
 210                             RADEON_CP_VC_CNTL_TCL_ENABLE)))
 211 #endif
 212
 213 #ifdef MESA_BIG_ENDIAN
 214 /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
 215 #define EMIT_ELT(dest, offset, x) do {                          \
 216         int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );     \
 217         GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );    \
 218         (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x);     \
 219         (void)rmesa; } while (0)
 220 #else
 221 #define EMIT_ELT(dest, offset, x) do {                          \
 222         (dest)[offset] = (GLushort) (x);                        \
 223         (void)rmesa; } while (0)
 224 #endif
 225
 226 #define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
 227
 228
 229
 230 #define TAG(x) tcl_##x
 231 #include "tnl_dd/t_dd_dmatmp2.h"
 232
 233 /**********************************************************************/
 234 /*                          External entrypoints                     */
 235 /**********************************************************************/
 236
 237 void radeonEmitPrimitive( GLcontext *ctx,
 238                           GLuint first,
 239                           GLuint last,
 240                           GLuint flags )
 241 {
 242    tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 243 }
 244
 245 void radeonEmitEltPrimitive( GLcontext *ctx,
 246                              GLuint first,
 247                              GLuint last,
 248                              GLuint flags )
 249 {
 250    tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 251 }
 252
 253 void radeonTclPrimitive( GLcontext *ctx,
 254                          GLenum prim,
 255                          int hw_prim )
 256 {
 257    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 258    GLuint se_cntl;
 259    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
 260
 261    if (newprim != rmesa->tcl.hw_primitive ||
 262        !discrete_prim[hw_prim&0xf]) {
 263       RADEON_NEWPRIM( rmesa );
 264       rmesa->tcl.hw_primitive = newprim;
 265    }
 266
 267    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
 268    se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
 269
 270    if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE))
 271       se_cntl |= RADEON_FLAT_SHADE_VTX_0;
 272    else
 273       se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
 274
 275    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
 276       RADEON_STATECHANGE( rmesa, set );
 277       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
 278    }
 279 }
 280
 281 /**********************************************************************/
 282 /*             Fog blend factor computation for hw tcl                */
 283 /*             same calculation used as in t_vb_fog.c                 */
 284 /**********************************************************************/
 285
 286 #define FOG_EXP_TABLE_SIZE 256
 287 #define FOG_MAX (10.0)
 288 #define EXP_FOG_MAX .0006595
 289 #define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE)
 290 static GLfloat exp_table[FOG_EXP_TABLE_SIZE];
 291
 292 #if 1
 293 #define NEG_EXP( result, narg )                                         \
 294 do {                                                                    \
 295    GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR));                       \
 296    GLint k = (GLint) f;                                                 \
 297    if (k > FOG_EXP_TABLE_SIZE-2)                                        \
 298       result = (GLfloat) EXP_FOG_MAX;                                   \
 299    else                                                                 \
 300       result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]);      \
 301 } while (0)
 302 #else
 303 #define NEG_EXP( result, narg )                                 \
 304 do {                                                            \
 305    result = exp(-narg);                                         \
 306 } while (0)
 307 #endif
 308
 309
 310 /**
 311  * Initialize the exp_table[] lookup table for approximating exp().
 312  */
 313 void
 314 radeonInitStaticFogData( void )
 315 {
 316    GLfloat f = 0.0F;
 317    GLint i = 0;
 318    for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) {
 319       exp_table[i] = (GLfloat) exp(-f);
 320    }
 321 }
 322
 323
 324 /**
 325  * Compute per-vertex fog blend factors from fog coordinates by
 326  * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function.
 327  * Fog coordinates are distances from the eye (typically between the
 328  * near and far clip plane distances).
 329  * Note the fog (eye Z) coords may be negative so we use ABS(z) below.
 330  * Fog blend factors are in the range [0,1].
 331  */
 332 float
 333 radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
 334 {
 335    GLfloat end  = ctx->Fog.End;
 336    GLfloat d, temp;
 337    const GLfloat z = FABSF(fogcoord);
 338
 339    switch (ctx->Fog.Mode) {
 340    case GL_LINEAR:
 341       if (ctx->Fog.Start == ctx->Fog.End)
 342          d = 1.0F;
 343       else
 344          d = 1.0F / (ctx->Fog.End - ctx->Fog.Start);
 345       temp = (end - z) * d;
 346       return CLAMP(temp, 0.0F, 1.0F);
 347       break;
 348    case GL_EXP:
 349       d = ctx->Fog.Density;
 350       NEG_EXP( temp, d * z );
 351       return temp;
 352       break;
 353    case GL_EXP2:
 354       d = ctx->Fog.Density*ctx->Fog.Density;
 355       NEG_EXP( temp, d * z * z );
 356       return temp;
 357       break;
 358    default:
 359       _mesa_problem(ctx, "Bad fog mode in make_fog_coord");
 360       return 0;
 361    }
 362 }
 363
 364 /**********************************************************************/
 365 /*                          Render pipeline stage                     */
 366 /**********************************************************************/
 367
 368
 369 /* TCL render.
 370  */
 371 static GLboolean radeon_run_tcl_render( GLcontext *ctx,
 372                                         struct tnl_pipeline_stage *stage )
 373 {
 374    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 375    TNLcontext *tnl = TNL_CONTEXT(ctx);
 376    struct vertex_buffer *VB = &tnl->vb;
 377    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
 378    GLuint i;
 379
 380    /* TODO: separate this from the swtnl pipeline
 381     */
 382    if (rmesa->TclFallback)
 383       return GL_TRUE;   /* fallback to software t&l */
 384
 385    if (VB->Count == 0)
 386       return GL_FALSE;
 387
 388    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
 389     * inputs.
 390     */
 391    if (ctx->Light.Enabled) {
 392       inputs |= VERT_BIT_NORMAL;
 393    }
 394
 395    if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
 396       inputs |= VERT_BIT_COLOR1;
 397    }
 398
 399    if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
 400       inputs |= VERT_BIT_FOG;
 401    }
 402
 403    for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
 404       if (ctx->Texture.Unit[i]._ReallyEnabled) {
 405       /* TODO: probably should not emit texture coords when texgen is enabled */
 406          if (rmesa->TexGenNeedNormals[i]) {
 407             inputs |= VERT_BIT_NORMAL;
 408          }
 409          inputs |= VERT_BIT_TEX(i);
 410       }
 411    }
 412
 413    radeonReleaseArrays( ctx, ~0 );
 414    radeonEmitArrays( ctx, inputs );
 415
 416    rmesa->tcl.Elts = VB->Elts;
 417
 418    for (i = 0 ; i < VB->PrimitiveCount ; i++)
 419    {
 420       GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
 421       GLuint start = VB->Primitive[i].start;
 422       GLuint length = VB->Primitive[i].count;
 423
 424       if (!length)
 425          continue;
 426
 427       if (rmesa->tcl.Elts)
 428          radeonEmitEltPrimitive( ctx, start, start+length, prim );
 429       else
 430          radeonEmitPrimitive( ctx, start, start+length, prim );
 431    }
 432
 433    return GL_FALSE;             /* finished the pipe */
 434 }
 435
 436
 437
 438 /* Initial state for tcl stage.
 439  */
 440 const struct tnl_pipeline_stage _radeon_tcl_stage =
 441 {
 442    "radeon render",
 443    NULL,
 444    NULL,
 445    NULL,
 446    NULL,
 447    radeon_run_tcl_render        /* run */
 448 };
 449
 450
 451
 452 /**********************************************************************/
 453 /*                 Validate state at pipeline start                   */
 454 /**********************************************************************/
 455
 456
 457 /*-----------------------------------------------------------------------
 458  * Manage TCL fallbacks
 459  */
 460
 461
 462 static void transition_to_swtnl( GLcontext *ctx )
 463 {
 464    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 465    TNLcontext *tnl = TNL_CONTEXT(ctx);
 466    GLuint se_cntl;
 467
 468    RADEON_NEWPRIM( rmesa );
 469    rmesa->swtcl.vertex_format = 0;
 470
 471    radeonChooseVertexState( ctx );
 472    radeonChooseRenderState( ctx );
 473
 474    _mesa_validate_all_lighting_tables( ctx );
 475
 476    tnl->Driver.NotifyMaterialChange =
 477       _mesa_validate_all_lighting_tables;
 478
 479    radeonReleaseArrays( ctx, ~0 );
 480
 481    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
 482    se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
 483
 484    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
 485       RADEON_STATECHANGE( rmesa, set );
 486       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
 487    }
 488 }
 489
 490
 491 static void transition_to_hwtnl( GLcontext *ctx )
 492 {
 493    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 494    TNLcontext *tnl = TNL_CONTEXT(ctx);
 495    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
 496
 497    se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
 498                      RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
 499                      RADEON_VTX_W0_IS_NOT_1_OVER_W0);
 500    se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0;
 501
 502    if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
 503       RADEON_STATECHANGE( rmesa, set );
 504       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
 505       _tnl_need_projected_coords( ctx, GL_FALSE );
 506    }
 507
 508    radeonUpdateMaterial( ctx );
 509
 510    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
 511
 512    if ( rmesa->dma.flush )
 513       rmesa->dma.flush( rmesa );
 514
 515    rmesa->dma.flush = NULL;
 516    rmesa->swtcl.vertex_format = 0;
 517
 518    if (rmesa->swtcl.indexed_verts.buf)
 519       radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
 520                               __FUNCTION__ );
 521
 522    if (RADEON_DEBUG & DEBUG_FALLBACKS)
 523       fprintf(stderr, "Radeon end tcl fallback\n");
 524 }
 525
 526 static char *fallbackStrings[] = {
 527    "Rasterization fallback",
 528    "Unfilled triangles",
 529    "Twosided lighting, differing materials",
 530    "Materials in VB (maybe between begin/end)",
 531    "Texgen unit 0",
 532    "Texgen unit 1",
 533    "Texgen unit 2",
 534    "User disable",
 535    "Fogcoord with separate specular lighting"
 536 };
 537
 538
 539 static char *getFallbackString(GLuint bit)
 540 {
 541    int i = 0;
 542    while (bit > 1) {
 543       i++;
 544       bit >>= 1;
 545    }
 546    return fallbackStrings[i];
 547 }
 548
 549
 550
 551 void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 552 {
 553    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 554    GLuint oldfallback = rmesa->TclFallback;
 555
 556    if (mode) {
 557       rmesa->TclFallback |= bit;
 558       if (oldfallback == 0) {
 559          if (RADEON_DEBUG & DEBUG_FALLBACKS)
 560             fprintf(stderr, "Radeon begin tcl fallback %s\n",
 561                     getFallbackString( bit ));
 562          transition_to_swtnl( ctx );
 563       }
 564    }
 565    else {
 566       rmesa->TclFallback &= ~bit;
 567       if (oldfallback == bit) {
 568          if (RADEON_DEBUG & DEBUG_FALLBACKS)
 569             fprintf(stderr, "Radeon end tcl fallback %s\n",
 570                     getFallbackString( bit ));
 571          transition_to_hwtnl( ctx );
 572       }
 573    }
 574 }