src/mesa/drivers/dri/radeon/radeon_tcl.c

   1 /**************************************************************************
   2
   3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   4                      VMware, Inc.
   5
   6 All Rights Reserved.
   7
   8 Permission is hereby granted, free of charge, to any person obtaining
   9 a copy of this software and associated documentation files (the
  10 "Software"), to deal in the Software without restriction, including
  11 without limitation the rights to use, copy, modify, merge, publish,
  12 distribute, sublicense, and/or sell copies of the Software, and to
  13 permit persons to whom the Software is furnished to do so, subject to
  14 the following conditions:
  15
  16 The above copyright notice and this permission notice (including the
  17 next paragraph) shall be included in all copies or substantial
  18 portions of the Software.
  19
  20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27
  28 **************************************************************************/
  29
  30 /*
  31  * Authors:
  32  *   Keith Whitwell <keithw@vmware.com>
  33  */
  34
  35 #include "main/glheader.h"
  36 #include "main/imports.h"
  37 #include "main/mtypes.h"
  38 #include "main/light.h"
  39 #include "main/enums.h"
  40 #include "main/state.h"
  41
  42 #include "vbo/vbo.h"
  43 #include "tnl/tnl.h"
  44 #include "tnl/t_pipeline.h"
  45
  46 #include "radeon_common.h"
  47 #include "radeon_context.h"
  48 #include "radeon_state.h"
  49 #include "radeon_ioctl.h"
  50 #include "radeon_tcl.h"
  51 #include "radeon_swtcl.h"
  52 #include "radeon_maos.h"
  53 #include "radeon_common_context.h"
  54
  55
  56
  57 /*
  58  * Render unclipped vertex buffers by emitting vertices directly to
  59  * dma buffers.  Use strip/fan hardware primitives where possible.
  60  * Try to simulate missing primitives with indexed vertices.
  61  */
  62 #define HAVE_POINTS      1
  63 #define HAVE_LINES       1
  64 #define HAVE_LINE_LOOP   0
  65 #define HAVE_LINE_STRIPS 1
  66 #define HAVE_TRIANGLES   1
  67 #define HAVE_TRI_STRIPS  1
  68 #define HAVE_TRI_STRIP_1 0
  69 #define HAVE_TRI_FANS    1
  70 #define HAVE_QUADS       0
  71 #define HAVE_QUAD_STRIPS 0
  72 #define HAVE_POLYGONS    1
  73 #define HAVE_ELTS        1
  74
  75
  76 #define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
  77 #define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
  78 #define HW_LINE_LOOP        0
  79 #define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
  80 #define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
  81 #define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
  82 #define HW_TRIANGLE_STRIP_1 0
  83 #define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
  84 #define HW_QUADS            0
  85 #define HW_QUAD_STRIP       0
  86 #define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
  87
  88
  89 static GLboolean discrete_prim[0x10] = {
  90    0,                           /* 0 none */
  91    1,                           /* 1 points */
  92    1,                           /* 2 lines */
  93    0,                           /* 3 line_strip */
  94    1,                           /* 4 tri_list */
  95    0,                           /* 5 tri_fan */
  96    0,                           /* 6 tri_type2 */
  97    1,                           /* 7 rect list (unused) */
  98    1,                           /* 8 3vert point */
  99    1,                           /* 9 3vert line */
 100    0,
 101    0,
 102    0,
 103    0,
 104    0,
 105    0,
 106 };
 107
 108
 109 #define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
 110 #define ELT_TYPE  GLushort
 111
 112 #define ELT_INIT(prim, hw_prim) \
 113    radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
 114
 115 #define GET_MESA_ELTS() rmesa->tcl.Elts
 116
 117
 118 /* Don't really know how many elts will fit in what's left of cmdbuf,
 119  * as there is state to emit, etc:
 120  */
 121
 122 /* Testing on isosurf shows a maximum around here.  Don't know if it's
 123  * the card or driver or kernel module that is causing the behaviour.
 124  */
 125 #define GET_MAX_HW_ELTS() 300
 126
 127
 128 #define RESET_STIPPLE() do {                    \
 129    RADEON_STATECHANGE( rmesa, lin );            \
 130    radeonEmitState(&rmesa->radeon);                     \
 131 } while (0)
 132
 133 #define AUTO_STIPPLE( mode )  do {              \
 134    RADEON_STATECHANGE( rmesa, lin );            \
 135    if (mode)                                    \
 136       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \
 137          RADEON_LINE_PATTERN_AUTO_RESET;        \
 138    else                                         \
 139       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
 140          ~RADEON_LINE_PATTERN_AUTO_RESET;       \
 141    radeonEmitState(&rmesa->radeon);             \
 142 } while (0)
 143
 144
 145
 146 #define ALLOC_ELTS(nr)  radeonAllocElts( rmesa, nr )
 147
 148 static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
 149 {
 150       if (rmesa->radeon.dma.flush)
 151          rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
 152
 153       radeonEmitAOS( rmesa,
 154                      rmesa->radeon.tcl.aos_count, 0 );
 155
 156       return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
 157                                        rmesa->tcl.hw_primitive, nr );
 158 }
 159
 160 #define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
 161
 162
 163
 164 /* TODO: Try to extend existing primitive if both are identical,
 165  * discrete and there are no intervening state changes.  (Somewhat
 166  * duplicates changes to DrawArrays code)
 167  */
 168 static void radeonEmitPrim( struct gl_context *ctx,
 169                        GLenum prim,
 170                        GLuint hwprim,
 171                        GLuint start,
 172                        GLuint count)
 173 {
 174    r100ContextPtr rmesa = R100_CONTEXT( ctx );
 175    radeonTclPrimitive( ctx, prim, hwprim );
 176
 177    radeonEmitAOS( rmesa,
 178                   rmesa->radeon.tcl.aos_count,
 179                   start );
 180
 181    /* Why couldn't this packet have taken an offset param?
 182     */
 183    radeonEmitVbufPrim( rmesa,
 184                        rmesa->tcl.vertex_format,
 185                        rmesa->tcl.hw_primitive,
 186                        count - start );
 187 }
 188
 189 #define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
 190    radeonEmitPrim( ctx, prim, hwprim, start, count );           \
 191    (void) rmesa; } while (0)
 192
 193 #define MAX_CONVERSION_SIZE 40
 194
 195 /* Try & join small primitives
 196  */
 197 #if 0
 198 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
 199 #else
 200 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )                    \
 201   ((NR) < 20 ||                                                 \
 202    ((NR) < 40 &&                                                \
 203     rmesa->tcl.hw_primitive == (PRIM|                           \
 204                             RADEON_CP_VC_CNTL_PRIM_WALK_IND|    \
 205                             RADEON_CP_VC_CNTL_TCL_ENABLE)))
 206 #endif
 207
 208 #ifdef MESA_BIG_ENDIAN
 209 /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
 210 #define EMIT_ELT(dest, offset, x) do {                          \
 211         int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 );  \
 212         GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 ); \
 213         (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x);     \
 214         (void)rmesa; } while (0)
 215 #else
 216 #define EMIT_ELT(dest, offset, x) do {                          \
 217         (dest)[offset] = (GLushort) (x);                        \
 218         (void)rmesa; } while (0)
 219 #endif
 220
 221 #define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
 222
 223
 224
 225 #define TAG(x) tcl_##x
 226 #include "tnl_dd/t_dd_dmatmp2.h"
 227
 228 /**********************************************************************/
 229 /*                          External entrypoints                     */
 230 /**********************************************************************/
 231
 232 void radeonEmitPrimitive( struct gl_context *ctx,
 233                           GLuint first,
 234                           GLuint last,
 235                           GLuint flags )
 236 {
 237    tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 238 }
 239
 240 void radeonEmitEltPrimitive( struct gl_context *ctx,
 241                              GLuint first,
 242                              GLuint last,
 243                              GLuint flags )
 244 {
 245    tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 246 }
 247
 248 void radeonTclPrimitive( struct gl_context *ctx,
 249                          GLenum prim,
 250                          int hw_prim )
 251 {
 252    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 253    GLuint se_cntl;
 254    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
 255
 256    radeon_prepare_render(&rmesa->radeon);
 257    if (rmesa->radeon.NewGLState)
 258       radeonValidateState( ctx );
 259
 260    if (newprim != rmesa->tcl.hw_primitive ||
 261        !discrete_prim[hw_prim&0xf]) {
 262       RADEON_NEWPRIM( rmesa );
 263       rmesa->tcl.hw_primitive = newprim;
 264    }
 265
 266    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
 267    se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
 268
 269    if (prim == GL_POLYGON && ctx->Light.ShadeModel == GL_FLAT)
 270       se_cntl |= RADEON_FLAT_SHADE_VTX_0;
 271    else
 272       se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
 273
 274    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
 275       RADEON_STATECHANGE( rmesa, set );
 276       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
 277    }
 278 }
 279
 280 /**
 281  * Predict total emit size for next rendering operation so there is no flush in middle of rendering
 282  * Prediction has to aim towards the best possible value that is worse than worst case scenario
 283  */
 284 static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs )
 285 {
 286   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 287   TNLcontext *tnl = TNL_CONTEXT(ctx);
 288   struct vertex_buffer *VB = &tnl->vb;
 289   GLuint space_required;
 290   GLuint state_size;
 291   GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
 292   int i;
 293   /* list of flags that are allocating aos object */
 294   const GLuint flags_to_check[] = {
 295     VERT_BIT_NORMAL,
 296     VERT_BIT_COLOR0,
 297     VERT_BIT_COLOR1,
 298     VERT_BIT_FOG
 299   };
 300   /* predict number of aos to emit */
 301   for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i)
 302   {
 303     if (inputs & flags_to_check[i])
 304       ++nr_aos;
 305   }
 306   for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
 307   {
 308     if (inputs & VERT_BIT_TEX(i))
 309       ++nr_aos;
 310   }
 311
 312   {
 313     /* count the prediction for state size */
 314     space_required = 0;
 315     state_size = radeonCountStateEmitSize( &rmesa->radeon );
 316     /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
 317     if (!rmesa->hw.tcl.dirty)
 318       state_size += rmesa->hw.tcl.check( &rmesa->radeon.glCtx, &rmesa->hw.tcl );
 319     /* predict size for elements */
 320     for (i = 0; i < VB->PrimitiveCount; ++i)
 321     {
 322       /* If primitive.count is less than MAX_CONVERSION_SIZE
 323          rendering code may decide convert to elts.
 324          In that case we have to make pessimistic prediction.
 325          and use larger of 2 paths. */
 326       const GLuint elts = ELTS_BUFSZ(nr_aos);
 327       const GLuint index = INDEX_BUFSZ;
 328       const GLuint vbuf = VBUF_BUFSZ;
 329       if (!VB->Primitive[i].count)
 330         continue;
 331       if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
 332           || vbuf > index + elts)
 333         space_required += vbuf;
 334       else
 335         space_required += index + elts;
 336       space_required += VB->Primitive[i].count * 3;
 337       space_required += AOS_BUFSZ(nr_aos);
 338     }
 339     space_required += SCISSOR_BUFSZ;
 340   }
 341   /* flush the buffer in case we need more than is left. */
 342   if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __func__))
 343     return space_required + radeonCountStateEmitSize( &rmesa->radeon );
 344   else
 345     return space_required + state_size;
 346 }
 347
 348 /**********************************************************************/
 349 /*                          Render pipeline stage                     */
 350 /**********************************************************************/
 351
 352
 353 /* TCL render.
 354  */
 355 static GLboolean radeon_run_tcl_render( struct gl_context *ctx,
 356                                         struct tnl_pipeline_stage *stage )
 357 {
 358    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 359    TNLcontext *tnl = TNL_CONTEXT(ctx);
 360    struct vertex_buffer *VB = &tnl->vb;
 361    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
 362    GLuint i;
 363    GLuint emit_end;
 364
 365    /* TODO: separate this from the swtnl pipeline
 366     */
 367    if (rmesa->radeon.TclFallback)
 368       return GL_TRUE;   /* fallback to software t&l */
 369
 370    if (VB->Count == 0)
 371       return GL_FALSE;
 372
 373    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
 374     * inputs.
 375     */
 376    if (ctx->Light.Enabled) {
 377       inputs |= VERT_BIT_NORMAL;
 378    }
 379
 380    if (_mesa_need_secondary_color(ctx)) {
 381       inputs |= VERT_BIT_COLOR1;
 382    }
 383
 384    if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
 385       inputs |= VERT_BIT_FOG;
 386    }
 387
 388    for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
 389       if (ctx->Texture.Unit[i]._Current) {
 390       /* TODO: probably should not emit texture coords when texgen is enabled */
 391          if (rmesa->TexGenNeedNormals[i]) {
 392             inputs |= VERT_BIT_NORMAL;
 393          }
 394          inputs |= VERT_BIT_TEX(i);
 395       }
 396    }
 397
 398    radeonReleaseArrays( ctx, ~0 );
 399    emit_end = radeonEnsureEmitSize( ctx, inputs )
 400      + rmesa->radeon.cmdbuf.cs->cdw;
 401    radeonEmitArrays( ctx, inputs );
 402
 403    rmesa->tcl.Elts = VB->Elts;
 404
 405    for (i = 0 ; i < VB->PrimitiveCount ; i++)
 406    {
 407       GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
 408       GLuint start = VB->Primitive[i].start;
 409       GLuint length = VB->Primitive[i].count;
 410
 411       if (!length)
 412          continue;
 413
 414       if (rmesa->tcl.Elts)
 415          radeonEmitEltPrimitive( ctx, start, start+length, prim );
 416       else
 417          radeonEmitPrimitive( ctx, start, start+length, prim );
 418    }
 419
 420    if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
 421       WARN_ONCE("Rendering was %d commands larger than predicted size."
 422           " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
 423
 424    return GL_FALSE;             /* finished the pipe */
 425 }
 426
 427
 428
 429 /* Initial state for tcl stage.
 430  */
 431 const struct tnl_pipeline_stage _radeon_tcl_stage =
 432 {
 433    "radeon render",
 434    NULL,
 435    NULL,
 436    NULL,
 437    NULL,
 438    radeon_run_tcl_render        /* run */
 439 };
 440
 441
 442
 443 /**********************************************************************/
 444 /*                 Validate state at pipeline start                   */
 445 /**********************************************************************/
 446
 447
 448 /*-----------------------------------------------------------------------
 449  * Manage TCL fallbacks
 450  */
 451
 452
 453 static void transition_to_swtnl( struct gl_context *ctx )
 454 {
 455    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 456    TNLcontext *tnl = TNL_CONTEXT(ctx);
 457    GLuint se_cntl;
 458
 459    RADEON_NEWPRIM( rmesa );
 460    rmesa->swtcl.vertex_format = 0;
 461
 462    radeonChooseVertexState( ctx );
 463    radeonChooseRenderState( ctx );
 464
 465    _tnl_validate_shine_tables( ctx );
 466
 467    tnl->Driver.NotifyMaterialChange =
 468       _tnl_validate_shine_tables;
 469
 470    radeonReleaseArrays( ctx, ~0 );
 471
 472    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
 473    se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
 474
 475    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
 476       RADEON_STATECHANGE( rmesa, set );
 477       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
 478    }
 479 }
 480
 481
 482 static void transition_to_hwtnl( struct gl_context *ctx )
 483 {
 484    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 485    TNLcontext *tnl = TNL_CONTEXT(ctx);
 486    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
 487
 488    se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
 489                      RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
 490                      RADEON_VTX_W0_IS_NOT_1_OVER_W0);
 491    se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0;
 492
 493    if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
 494       RADEON_STATECHANGE( rmesa, set );
 495       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
 496       _tnl_need_projected_coords( ctx, GL_FALSE );
 497    }
 498
 499    radeonUpdateMaterial( ctx );
 500
 501    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
 502
 503    if ( rmesa->radeon.dma.flush )
 504       rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
 505
 506    rmesa->radeon.dma.flush = NULL;
 507    rmesa->swtcl.vertex_format = 0;
 508
 509    //   if (rmesa->swtcl.indexed_verts.buf)
 510    //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
 511    //                         __func__ );
 512
 513    if (RADEON_DEBUG & RADEON_FALLBACKS)
 514       fprintf(stderr, "Radeon end tcl fallback\n");
 515 }
 516
 517 static char *fallbackStrings[] = {
 518    "Rasterization fallback",
 519    "Unfilled triangles",
 520    "Twosided lighting, differing materials",
 521    "Materials in VB (maybe between begin/end)",
 522    "Texgen unit 0",
 523    "Texgen unit 1",
 524    "Texgen unit 2",
 525    "User disable",
 526    "Fogcoord with separate specular lighting"
 527 };
 528
 529
 530 static char *getFallbackString(GLuint bit)
 531 {
 532    int i = 0;
 533    while (bit > 1) {
 534       i++;
 535       bit >>= 1;
 536    }
 537    return fallbackStrings[i];
 538 }
 539
 540
 541
 542 void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
 543 {
 544    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 545    GLuint oldfallback = rmesa->radeon.TclFallback;
 546
 547    if (mode) {
 548       rmesa->radeon.TclFallback |= bit;
 549       if (oldfallback == 0) {
 550          if (RADEON_DEBUG & RADEON_FALLBACKS)
 551             fprintf(stderr, "Radeon begin tcl fallback %s\n",
 552                     getFallbackString( bit ));
 553          transition_to_swtnl( ctx );
 554       }
 555    }
 556    else {
 557       rmesa->radeon.TclFallback &= ~bit;
 558       if (oldfallback == bit) {
 559          if (RADEON_DEBUG & RADEON_FALLBACKS)
 560             fprintf(stderr, "Radeon end tcl fallback %s\n",
 561                     getFallbackString( bit ));
 562          transition_to_hwtnl( ctx );
 563       }
 564    }
 565 }