src/mesa/drivers/dri/radeon/radeon_tcl.c

   1 /**************************************************************************
   2
   3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   4                      VMware, Inc.
   5
   6 All Rights Reserved.
   7
   8 Permission is hereby granted, free of charge, to any person obtaining
   9 a copy of this software and associated documentation files (the
  10 "Software"), to deal in the Software without restriction, including
  11 without limitation the rights to use, copy, modify, merge, publish,
  12 distribute, sublicense, and/or sell copies of the Software, and to
  13 permit persons to whom the Software is furnished to do so, subject to
  14 the following conditions:
  15
  16 The above copyright notice and this permission notice (including the
  17 next paragraph) shall be included in all copies or substantial
  18 portions of the Software.
  19
  20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27
  28 **************************************************************************/
  29
  30 /*
  31  * Authors:
  32  *   Keith Whitwell <keithw@vmware.com>
  33  */
  34
  35 #include "main/glheader.h"
  36 #include "util/imports.h"
  37 #include "main/mtypes.h"
  38 #include "main/light.h"
  39 #include "main/enums.h"
  40 #include "main/state.h"
  41
  42 #include "util/macros.h"
  43
  44 #include "vbo/vbo.h"
  45 #include "tnl/tnl.h"
  46 #include "tnl/t_pipeline.h"
  47
  48 #include "radeon_common.h"
  49 #include "radeon_context.h"
  50 #include "radeon_state.h"
  51 #include "radeon_ioctl.h"
  52 #include "radeon_tcl.h"
  53 #include "radeon_swtcl.h"
  54 #include "radeon_maos.h"
  55 #include "radeon_common_context.h"
  56
  57
  58
  59 /*
  60  * Render unclipped vertex buffers by emitting vertices directly to
  61  * dma buffers.  Use strip/fan hardware primitives where possible.
  62  * Try to simulate missing primitives with indexed vertices.
  63  */
  64 #define HAVE_POINTS      1
  65 #define HAVE_LINES       1
  66 #define HAVE_LINE_LOOP   0
  67 #define HAVE_LINE_STRIPS 1
  68 #define HAVE_TRIANGLES   1
  69 #define HAVE_TRI_STRIPS  1
  70 #define HAVE_TRI_FANS    1
  71 #define HAVE_QUADS       0
  72 #define HAVE_QUAD_STRIPS 0
  73 #define HAVE_POLYGONS    1
  74 #define HAVE_ELTS        1
  75
  76
  77 #define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
  78 #define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
  79 #define HW_LINE_LOOP        0
  80 #define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
  81 #define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
  82 #define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
  83 #define HW_TRIANGLE_STRIP_1 0
  84 #define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
  85 #define HW_QUADS            0
  86 #define HW_QUAD_STRIP       0
  87 #define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
  88
  89
  90 static GLboolean discrete_prim[0x10] = {
  91    0,                           /* 0 none */
  92    1,                           /* 1 points */
  93    1,                           /* 2 lines */
  94    0,                           /* 3 line_strip */
  95    1,                           /* 4 tri_list */
  96    0,                           /* 5 tri_fan */
  97    0,                           /* 6 tri_type2 */
  98    1,                           /* 7 rect list (unused) */
  99    1,                           /* 8 3vert point */
 100    1,                           /* 9 3vert line */
 101    0,
 102    0,
 103    0,
 104    0,
 105    0,
 106    0,
 107 };
 108
 109
 110 #define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
 111 #define ELT_TYPE  GLushort
 112
 113 #define ELT_INIT(prim, hw_prim) \
 114    radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
 115
 116 #define GET_MESA_ELTS() rmesa->tcl.Elts
 117
 118
 119 /* Don't really know how many elts will fit in what's left of cmdbuf,
 120  * as there is state to emit, etc:
 121  */
 122
 123 /* Testing on isosurf shows a maximum around here.  Don't know if it's
 124  * the card or driver or kernel module that is causing the behaviour.
 125  */
 126 #define GET_MAX_HW_ELTS() 300
 127
 128
 129 #define RESET_STIPPLE() do {                    \
 130    RADEON_STATECHANGE( rmesa, lin );            \
 131    radeonEmitState(&rmesa->radeon);                     \
 132 } while (0)
 133
 134 #define AUTO_STIPPLE( mode )  do {              \
 135    RADEON_STATECHANGE( rmesa, lin );            \
 136    if (mode)                                    \
 137       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \
 138          RADEON_LINE_PATTERN_AUTO_RESET;        \
 139    else                                         \
 140       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
 141          ~RADEON_LINE_PATTERN_AUTO_RESET;       \
 142    radeonEmitState(&rmesa->radeon);             \
 143 } while (0)
 144
 145
 146
 147 #define ALLOC_ELTS(nr)  radeonAllocElts( rmesa, nr )
 148
 149 static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
 150 {
 151       if (rmesa->radeon.dma.flush)
 152          rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
 153
 154       radeonEmitAOS( rmesa,
 155                      rmesa->radeon.tcl.aos_count, 0 );
 156
 157       return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
 158                                        rmesa->tcl.hw_primitive, nr );
 159 }
 160
 161 #define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
 162
 163
 164
 165 /* TODO: Try to extend existing primitive if both are identical,
 166  * discrete and there are no intervening state changes.  (Somewhat
 167  * duplicates changes to DrawArrays code)
 168  */
 169 static void radeonEmitPrim( struct gl_context *ctx,
 170                        GLenum prim,
 171                        GLuint hwprim,
 172                        GLuint start,
 173                        GLuint count)
 174 {
 175    r100ContextPtr rmesa = R100_CONTEXT( ctx );
 176    radeonTclPrimitive( ctx, prim, hwprim );
 177
 178    radeonEmitAOS( rmesa,
 179                   rmesa->radeon.tcl.aos_count,
 180                   start );
 181
 182    /* Why couldn't this packet have taken an offset param?
 183     */
 184    radeonEmitVbufPrim( rmesa,
 185                        rmesa->tcl.vertex_format,
 186                        rmesa->tcl.hw_primitive,
 187                        count - start );
 188 }
 189
 190 #define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
 191    radeonEmitPrim( ctx, prim, hwprim, start, count );           \
 192    (void) rmesa; } while (0)
 193
 194 #define MAX_CONVERSION_SIZE 40
 195
 196 /* Try & join small primitives
 197  */
 198 #if 0
 199 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
 200 #else
 201 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )                    \
 202   ((NR) < 20 ||                                                 \
 203    ((NR) < 40 &&                                                \
 204     rmesa->tcl.hw_primitive == (PRIM|                           \
 205                             RADEON_CP_VC_CNTL_PRIM_WALK_IND|    \
 206                             RADEON_CP_VC_CNTL_TCL_ENABLE)))
 207 #endif
 208
 209 #ifdef MESA_BIG_ENDIAN
 210 /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
 211 #define EMIT_ELT(dest, offset, x) do {                          \
 212         int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 );  \
 213         GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 ); \
 214         (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x);     \
 215         (void)rmesa; } while (0)
 216 #else
 217 #define EMIT_ELT(dest, offset, x) do {                          \
 218         (dest)[offset] = (GLushort) (x);                        \
 219         (void)rmesa; } while (0)
 220 #endif
 221
 222 #define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
 223
 224
 225
 226 #define TAG(x) tcl_##x
 227 #include "tnl_dd/t_dd_dmatmp2.h"
 228
 229 /**********************************************************************/
 230 /*                          External entrypoints                     */
 231 /**********************************************************************/
 232
 233 void radeonEmitPrimitive( struct gl_context *ctx,
 234                           GLuint first,
 235                           GLuint last,
 236                           GLuint flags )
 237 {
 238    tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 239 }
 240
 241 void radeonEmitEltPrimitive( struct gl_context *ctx,
 242                              GLuint first,
 243                              GLuint last,
 244                              GLuint flags )
 245 {
 246    tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 247 }
 248
 249 void radeonTclPrimitive( struct gl_context *ctx,
 250                          GLenum prim,
 251                          int hw_prim )
 252 {
 253    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 254    GLuint se_cntl;
 255    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
 256
 257    radeon_prepare_render(&rmesa->radeon);
 258    if (rmesa->radeon.NewGLState)
 259       radeonValidateState( ctx );
 260
 261    if (newprim != rmesa->tcl.hw_primitive ||
 262        !discrete_prim[hw_prim&0xf]) {
 263       RADEON_NEWPRIM( rmesa );
 264       rmesa->tcl.hw_primitive = newprim;
 265    }
 266
 267    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
 268    se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
 269
 270    if (prim == GL_POLYGON && ctx->Light.ShadeModel == GL_FLAT)
 271       se_cntl |= RADEON_FLAT_SHADE_VTX_0;
 272    else
 273       se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
 274
 275    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
 276       RADEON_STATECHANGE( rmesa, set );
 277       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
 278    }
 279 }
 280
 281 /**
 282  * Predict total emit size for next rendering operation so there is no flush in middle of rendering
 283  * Prediction has to aim towards the best possible value that is worse than worst case scenario
 284  */
 285 static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs )
 286 {
 287   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 288   TNLcontext *tnl = TNL_CONTEXT(ctx);
 289   struct vertex_buffer *VB = &tnl->vb;
 290   GLuint space_required;
 291   GLuint state_size;
 292   GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
 293   int i;
 294   /* list of flags that are allocating aos object */
 295   const GLuint flags_to_check[] = {
 296     VERT_BIT_NORMAL,
 297     VERT_BIT_COLOR0,
 298     VERT_BIT_COLOR1,
 299     VERT_BIT_FOG
 300   };
 301   /* predict number of aos to emit */
 302   for (i=0; i < ARRAY_SIZE(flags_to_check); ++i)
 303   {
 304     if (inputs & flags_to_check[i])
 305       ++nr_aos;
 306   }
 307   for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
 308   {
 309     if (inputs & VERT_BIT_TEX(i))
 310       ++nr_aos;
 311   }
 312
 313   {
 314     /* count the prediction for state size */
 315     space_required = 0;
 316     state_size = radeonCountStateEmitSize( &rmesa->radeon );
 317     /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
 318     if (!rmesa->hw.tcl.dirty)
 319       state_size += rmesa->hw.tcl.check( &rmesa->radeon.glCtx, &rmesa->hw.tcl );
 320     /* predict size for elements */
 321     for (i = 0; i < VB->PrimitiveCount; ++i)
 322     {
 323       /* If primitive.count is less than MAX_CONVERSION_SIZE
 324          rendering code may decide convert to elts.
 325          In that case we have to make pessimistic prediction.
 326          and use larger of 2 paths. */
 327       const GLuint elts = ELTS_BUFSZ(nr_aos);
 328       const GLuint index = INDEX_BUFSZ;
 329       const GLuint vbuf = VBUF_BUFSZ;
 330       if (!VB->Primitive[i].count)
 331         continue;
 332       if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
 333           || vbuf > index + elts)
 334         space_required += vbuf;
 335       else
 336         space_required += index + elts;
 337       space_required += VB->Primitive[i].count * 3;
 338       space_required += AOS_BUFSZ(nr_aos);
 339     }
 340     space_required += SCISSOR_BUFSZ;
 341   }
 342   /* flush the buffer in case we need more than is left. */
 343   if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __func__))
 344     return space_required + radeonCountStateEmitSize( &rmesa->radeon );
 345   else
 346     return space_required + state_size;
 347 }
 348
 349 /**********************************************************************/
 350 /*                          Render pipeline stage                     */
 351 /**********************************************************************/
 352
 353
 354 /* TCL render.
 355  */
 356 static GLboolean radeon_run_tcl_render( struct gl_context *ctx,
 357                                         struct tnl_pipeline_stage *stage )
 358 {
 359    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 360    TNLcontext *tnl = TNL_CONTEXT(ctx);
 361    struct vertex_buffer *VB = &tnl->vb;
 362    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
 363    GLuint i;
 364    GLuint emit_end;
 365
 366    /* TODO: separate this from the swtnl pipeline
 367     */
 368    if (rmesa->radeon.TclFallback)
 369       return GL_TRUE;   /* fallback to software t&l */
 370
 371    if (VB->Count == 0)
 372       return GL_FALSE;
 373
 374    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
 375     * inputs.
 376     */
 377    if (ctx->Light.Enabled) {
 378       inputs |= VERT_BIT_NORMAL;
 379    }
 380
 381    if (_mesa_need_secondary_color(ctx)) {
 382       inputs |= VERT_BIT_COLOR1;
 383    }
 384
 385    if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
 386       inputs |= VERT_BIT_FOG;
 387    }
 388
 389    for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
 390       if (ctx->Texture.Unit[i]._Current) {
 391       /* TODO: probably should not emit texture coords when texgen is enabled */
 392          if (rmesa->TexGenNeedNormals[i]) {
 393             inputs |= VERT_BIT_NORMAL;
 394          }
 395          inputs |= VERT_BIT_TEX(i);
 396       }
 397    }
 398
 399    radeonReleaseArrays( ctx, ~0 );
 400    emit_end = radeonEnsureEmitSize( ctx, inputs )
 401      + rmesa->radeon.cmdbuf.cs->cdw;
 402    radeonEmitArrays( ctx, inputs );
 403
 404    rmesa->tcl.Elts = VB->Elts;
 405
 406    for (i = 0 ; i < VB->PrimitiveCount ; i++)
 407    {
 408       GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
 409       GLuint start = VB->Primitive[i].start;
 410       GLuint length = VB->Primitive[i].count;
 411
 412       if (!length)
 413          continue;
 414
 415       if (rmesa->tcl.Elts)
 416          radeonEmitEltPrimitive( ctx, start, start+length, prim );
 417       else
 418          radeonEmitPrimitive( ctx, start, start+length, prim );
 419    }
 420
 421    if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
 422       WARN_ONCE("Rendering was %d commands larger than predicted size."
 423           " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
 424
 425    return GL_FALSE;             /* finished the pipe */
 426 }
 427
 428
 429
 430 /* Initial state for tcl stage.
 431  */
 432 const struct tnl_pipeline_stage _radeon_tcl_stage =
 433 {
 434    "radeon render",
 435    NULL,
 436    NULL,
 437    NULL,
 438    NULL,
 439    radeon_run_tcl_render        /* run */
 440 };
 441
 442
 443
 444 /**********************************************************************/
 445 /*                 Validate state at pipeline start                   */
 446 /**********************************************************************/
 447
 448
 449 /*-----------------------------------------------------------------------
 450  * Manage TCL fallbacks
 451  */
 452
 453
 454 static void transition_to_swtnl( struct gl_context *ctx )
 455 {
 456    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 457    TNLcontext *tnl = TNL_CONTEXT(ctx);
 458    GLuint se_cntl;
 459
 460    RADEON_NEWPRIM( rmesa );
 461    rmesa->swtcl.vertex_format = 0;
 462
 463    radeonChooseVertexState( ctx );
 464    radeonChooseRenderState( ctx );
 465
 466    _tnl_validate_shine_tables( ctx );
 467
 468    tnl->Driver.NotifyMaterialChange =
 469       _tnl_validate_shine_tables;
 470
 471    radeonReleaseArrays( ctx, ~0 );
 472
 473    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
 474    se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
 475
 476    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
 477       RADEON_STATECHANGE( rmesa, set );
 478       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
 479    }
 480 }
 481
 482
 483 static void transition_to_hwtnl( struct gl_context *ctx )
 484 {
 485    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 486    TNLcontext *tnl = TNL_CONTEXT(ctx);
 487    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
 488
 489    se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
 490                      RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
 491                      RADEON_VTX_W0_IS_NOT_1_OVER_W0);
 492    se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0;
 493
 494    if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
 495       RADEON_STATECHANGE( rmesa, set );
 496       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
 497       _tnl_need_projected_coords( ctx, GL_FALSE );
 498    }
 499
 500    radeonUpdateMaterial( ctx );
 501
 502    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
 503
 504    if ( rmesa->radeon.dma.flush )
 505       rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
 506
 507    rmesa->radeon.dma.flush = NULL;
 508    rmesa->swtcl.vertex_format = 0;
 509
 510    //   if (rmesa->swtcl.indexed_verts.buf)
 511    //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
 512    //                         __func__ );
 513
 514    if (RADEON_DEBUG & RADEON_FALLBACKS)
 515       fprintf(stderr, "Radeon end tcl fallback\n");
 516 }
 517
 518 static char *fallbackStrings[] = {
 519    "Rasterization fallback",
 520    "Unfilled triangles",
 521    "Twosided lighting, differing materials",
 522    "Materials in VB (maybe between begin/end)",
 523    "Texgen unit 0",
 524    "Texgen unit 1",
 525    "Texgen unit 2",
 526    "User disable",
 527    "Fogcoord with separate specular lighting"
 528 };
 529
 530
 531 static char *getFallbackString(GLuint bit)
 532 {
 533    int i = 0;
 534    while (bit > 1) {
 535       i++;
 536       bit >>= 1;
 537    }
 538    return fallbackStrings[i];
 539 }
 540
 541
 542
 543 void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
 544 {
 545    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 546    GLuint oldfallback = rmesa->radeon.TclFallback;
 547
 548    if (mode) {
 549       rmesa->radeon.TclFallback |= bit;
 550       if (oldfallback == 0) {
 551          if (RADEON_DEBUG & RADEON_FALLBACKS)
 552             fprintf(stderr, "Radeon begin tcl fallback %s\n",
 553                     getFallbackString( bit ));
 554          transition_to_swtnl( ctx );
 555       }
 556    }
 557    else {
 558       rmesa->radeon.TclFallback &= ~bit;
 559       if (oldfallback == bit) {
 560          if (RADEON_DEBUG & RADEON_FALLBACKS)
 561             fprintf(stderr, "Radeon end tcl fallback %s\n",
 562                     getFallbackString( bit ));
 563          transition_to_hwtnl( ctx );
 564       }
 565    }
 566 }