src/mesa/drivers/dri/r300/r300_render.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4
   5 All Rights Reserved.
   6
   7 Permission is hereby granted, free of charge, to any person obtaining a
   8 copy of this software and associated documentation files (the "Software"),
   9 to deal in the Software without restriction, including without limitation
  10 on the rights to use, copy, modify, merge, publish, distribute, sub
  11 license, and/or sell copies of the Software, and to permit persons to whom
  12 the Software is furnished to do so, subject to the following conditions:
  13
  14 The above copyright notice and this permission notice (including the next
  15 paragraph) shall be included in all copies or substantial portions of the
  16 Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24 USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26 **************************************************************************/
  27
  28 /**
  29  * \file
  30  *
  31  * \brief R300 Render (Vertex Buffer Implementation)
  32  *
  33  * The immediate implementation has been removed from CVS in favor of the vertex
  34  * buffer implementation.
  35  *
  36  * The render functions are called by the pipeline manager to render a batch of
  37  * primitives. They return TRUE to pass on to the next stage (i.e. software
  38  * rasterization) or FALSE to indicate that the pipeline has finished after
  39  * rendering something.
  40  *
  41  * When falling back to software TCL still attempt to use hardware
  42  * rasterization.
  43  *
  44  * I am not sure that the cache related registers are setup correctly, but
  45  * obviously this does work... Further investigation is needed.
  46  *
  47  * \author Nicolai Haehnle <prefect_@gmx.net>
  48  *
  49  * \todo Add immediate implementation back? Perhaps this is useful if there are
  50  * no bugs...
  51  */
  52
  53 #include "main/glheader.h"
  54 #include "main/state.h"
  55 #include "main/imports.h"
  56 #include "main/enums.h"
  57 #include "main/macros.h"
  58 #include "main/context.h"
  59 #include "main/dd.h"
  60 #include "main/simple_list.h"
  61 #include "main/api_arrayelt.h"
  62 #include "swrast/swrast.h"
  63 #include "swrast_setup/swrast_setup.h"
  64 #include "vbo/vbo.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_vp_build.h"
  67 #include "radeon_reg.h"
  68 #include "radeon_macros.h"
  69 #include "r300_context.h"
  70 #include "r300_ioctl.h"
  71 #include "r300_state.h"
  72 #include "r300_reg.h"
  73 #include "r300_tex.h"
  74 #include "r300_emit.h"
  75 #include "r300_fragprog_common.h"
  76
  77 extern int future_hw_tcl_on;
  78
  79 /**
  80  * \brief Convert a OpenGL primitive type into a R300 primitive type.
  81  */
  82 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
  83 {
  84         switch (prim & PRIM_MODE_MASK) {
  85         case GL_POINTS:
  86                 return R300_VAP_VF_CNTL__PRIM_POINTS;
  87                 break;
  88         case GL_LINES:
  89                 return R300_VAP_VF_CNTL__PRIM_LINES;
  90                 break;
  91         case GL_LINE_STRIP:
  92                 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
  93                 break;
  94         case GL_LINE_LOOP:
  95                 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
  96                 break;
  97         case GL_TRIANGLES:
  98                 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
  99                 break;
 100         case GL_TRIANGLE_STRIP:
 101                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
 102                 break;
 103         case GL_TRIANGLE_FAN:
 104                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
 105                 break;
 106         case GL_QUADS:
 107                 return R300_VAP_VF_CNTL__PRIM_QUADS;
 108                 break;
 109         case GL_QUAD_STRIP:
 110                 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
 111                 break;
 112         case GL_POLYGON:
 113                 return R300_VAP_VF_CNTL__PRIM_POLYGON;
 114                 break;
 115         default:
 116                 assert(0);
 117                 return -1;
 118                 break;
 119         }
 120 }
 121
 122 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
 123 {
 124         int verts_off = 0;
 125
 126         switch (prim & PRIM_MODE_MASK) {
 127         case GL_POINTS:
 128                 verts_off = 0;
 129                 break;
 130         case GL_LINES:
 131                 verts_off = num_verts % 2;
 132                 break;
 133         case GL_LINE_STRIP:
 134                 if (num_verts < 2)
 135                         verts_off = num_verts;
 136                 break;
 137         case GL_LINE_LOOP:
 138                 if (num_verts < 2)
 139                         verts_off = num_verts;
 140                 break;
 141         case GL_TRIANGLES:
 142                 verts_off = num_verts % 3;
 143                 break;
 144         case GL_TRIANGLE_STRIP:
 145                 if (num_verts < 3)
 146                         verts_off = num_verts;
 147                 break;
 148         case GL_TRIANGLE_FAN:
 149                 if (num_verts < 3)
 150                         verts_off = num_verts;
 151                 break;
 152         case GL_QUADS:
 153                 verts_off = num_verts % 4;
 154                 break;
 155         case GL_QUAD_STRIP:
 156                 if (num_verts < 4)
 157                         verts_off = num_verts;
 158                 else
 159                         verts_off = num_verts % 2;
 160                 break;
 161         case GL_POLYGON:
 162                 if (num_verts < 3)
 163                         verts_off = num_verts;
 164                 break;
 165         default:
 166                 assert(0);
 167                 return -1;
 168                 break;
 169         }
 170
 171         return num_verts - verts_off;
 172 }
 173
 174 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
 175 {
 176         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 177         void *out;
 178
 179         radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
 180                              &rmesa->radeon.tcl.elt_dma_offset, n_elts * 4, 4);
 181         radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
 182         out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
 183         memcpy(out, elts, n_elts * 4);
 184         radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
 185 }
 186
 187 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 188 {
 189         BATCH_LOCALS(&rmesa->radeon);
 190
 191         if (vertex_count > 0) {
 192                 BEGIN_BATCH(10);
 193                 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
 194                 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
 195                           ((vertex_count + 0) << 16) |
 196                           type |
 197                           R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
 198
 199                 if (!rmesa->radeon.radeonScreen->kernel_mm) {
 200                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 201                         OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
 202                                  (R300_VAP_PORT_IDX0 >> 2));
 203                         OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
 204                                         rmesa->radeon.tcl.elt_dma_bo,
 205                                         rmesa->radeon.tcl.elt_dma_offset,
 206                                         RADEON_GEM_DOMAIN_GTT, 0, 0);
 207                         OUT_BATCH(vertex_count);
 208                 } else {
 209                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 210                         OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
 211                                  (R300_VAP_PORT_IDX0 >> 2));
 212                         OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
 213                         OUT_BATCH(vertex_count);
 214                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 215                                               rmesa->radeon.tcl.elt_dma_bo,
 216                                               RADEON_GEM_DOMAIN_GTT, 0, 0);
 217                 }
 218                 END_BATCH();
 219         }
 220 }
 221
 222 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 223 {
 224         BATCH_LOCALS(&rmesa->radeon);
 225         uint32_t voffset;
 226         int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 227         int i;
 228
 229         if (RADEON_DEBUG & DEBUG_VERTS)
 230                 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 231                         offset);
 232
 233
 234         if (!rmesa->radeon.radeonScreen->kernel_mm) {
 235                 BEGIN_BATCH(sz+2+(nr * 2));
 236                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 237                 OUT_BATCH(nr);
 238
 239                 for (i = 0; i + 1 < nr; i += 2) {
 240                         OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
 241                                   (rmesa->radeon.tcl.aos[i].stride << 8) |
 242                                   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
 243                                   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
 244
 245                         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
 246                                 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
 247                         OUT_BATCH_RELOC(voffset,
 248                                         rmesa->radeon.tcl.aos[i].bo,
 249                                         voffset,
 250                                         RADEON_GEM_DOMAIN_GTT,
 251                                         0, 0);
 252                         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
 253                           offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
 254                         OUT_BATCH_RELOC(voffset,
 255                                         rmesa->radeon.tcl.aos[i+1].bo,
 256                                         voffset,
 257                                         RADEON_GEM_DOMAIN_GTT,
 258                                         0, 0);
 259                 }
 260
 261                 if (nr & 1) {
 262                         OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
 263                                   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
 264                         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
 265                                 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
 266                         OUT_BATCH_RELOC(voffset,
 267                                         rmesa->radeon.tcl.aos[nr - 1].bo,
 268                                         voffset,
 269                                         RADEON_GEM_DOMAIN_GTT,
 270                                         0, 0);
 271                 }
 272                 END_BATCH();
 273         } else {
 274
 275                 BEGIN_BATCH(sz+2+(nr * 2));
 276                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 277                 OUT_BATCH(nr);
 278
 279                 for (i = 0; i + 1 < nr; i += 2) {
 280                         OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
 281                                   (rmesa->radeon.tcl.aos[i].stride << 8) |
 282                                   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
 283                                   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
 284
 285                         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
 286                                 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
 287                         OUT_BATCH(voffset);
 288                         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
 289                                 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
 290                         OUT_BATCH(voffset);
 291                 }
 292
 293                 if (nr & 1) {
 294                         OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
 295                           (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
 296                         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
 297                                 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
 298                         OUT_BATCH(voffset);
 299                 }
 300                 for (i = 0; i + 1 < nr; i += 2) {
 301                         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
 302                                 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
 303                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 304                                               rmesa->radeon.tcl.aos[i+0].bo,
 305                                               RADEON_GEM_DOMAIN_GTT,
 306                                               0, 0);
 307                         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
 308                                 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
 309                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 310                                               rmesa->radeon.tcl.aos[i+1].bo,
 311                                               RADEON_GEM_DOMAIN_GTT,
 312                                               0, 0);
 313                 }
 314                 if (nr & 1) {
 315                         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
 316                                 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
 317                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 318                                               rmesa->radeon.tcl.aos[nr-1].bo,
 319                                               RADEON_GEM_DOMAIN_GTT,
 320                                               0, 0);
 321                 }
 322                 END_BATCH();
 323         }
 324
 325 }
 326
 327 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
 328 {
 329         BATCH_LOCALS(&rmesa->radeon);
 330
 331         BEGIN_BATCH(3);
 332         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
 333         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
 334         END_BATCH();
 335 }
 336
 337 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 338                                    int start, int end, int prim)
 339 {
 340         int type, num_verts;
 341         TNLcontext *tnl = TNL_CONTEXT(ctx);
 342         struct vertex_buffer *vb = &tnl->vb;
 343
 344         type = r300PrimitiveType(rmesa, prim);
 345         num_verts = r300NumVerts(rmesa, end - start, prim);
 346
 347         if (type < 0 || num_verts <= 0)
 348                 return;
 349
 350         /* Make space for at least 64 dwords.
 351          * This is supposed to ensure that we can get all rendering
 352          * commands into a single command buffer.
 353          */
 354         rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
 355
 356         if (vb->Elts) {
 357                 if (num_verts > 65535) {
 358                         /* not implemented yet */
 359                         WARN_ONCE("Too many elts\n");
 360                         return;
 361                 }
 362                 /* Note: The following is incorrect, but it's the best I can do
 363                  * without a major refactoring of how DMA memory is handled.
 364                  * The problem: Ensuring that both vertex arrays *and* index
 365                  * arrays are at the right position, and then ensuring that
 366                  * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
 367                  * at once.
 368                  *
 369                  * So why is the following incorrect? Well, it seems like
 370                  * allocating the index array might actually evict the vertex
 371                  * arrays. *sigh*
 372                  */
 373                 r300EmitElts(ctx, vb->Elts, num_verts);
 374                 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
 375                 r300FireEB(rmesa, num_verts, type);
 376         } else {
 377                 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
 378                 r300FireAOS(rmesa, num_verts, type);
 379         }
 380         COMMIT_BATCH();
 381 }
 382
 383 static GLboolean r300RunRender(GLcontext * ctx,
 384                                struct tnl_pipeline_stage *stage)
 385 {
 386         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 387         int i;
 388         TNLcontext *tnl = TNL_CONTEXT(ctx);
 389         struct vertex_buffer *vb = &tnl->vb;
 390
 391         if (RADEON_DEBUG & DEBUG_PRIMS)
 392                 fprintf(stderr, "%s\n", __FUNCTION__);
 393
 394         r300UpdateShaders(rmesa);
 395         if (r300EmitArrays(ctx))
 396                 return GL_TRUE;
 397
 398         r300UpdateShaderStates(rmesa);
 399
 400         r300EmitCacheFlush(rmesa);
 401         radeonEmitState(&rmesa->radeon);
 402
 403         for (i = 0; i < vb->PrimitiveCount; i++) {
 404                 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 405                 GLuint start = vb->Primitive[i].start;
 406                 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 407                 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
 408         }
 409
 410         r300EmitCacheFlush(rmesa);
 411
 412         radeonReleaseArrays(ctx, ~0);
 413
 414         return GL_FALSE;
 415 }
 416
 417 #define FALLBACK_IF(expr)                                               \
 418         do {                                                            \
 419                 if (expr) {                                             \
 420                         if (1 || RADEON_DEBUG & DEBUG_FALLBACKS)        \
 421                                 WARN_ONCE("Software fallback:%s\n",     \
 422                                           #expr);                       \
 423                         return R300_FALLBACK_RAST;                      \
 424                 }                                                       \
 425         } while(0)
 426
 427 static int r300Fallback(GLcontext * ctx)
 428 {
 429         r300ContextPtr r300 = R300_CONTEXT(ctx);
 430         const unsigned back = ctx->Stencil._BackFace;
 431
 432         FALLBACK_IF(r300->radeon.Fallback);
 433
 434         struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
 435         if (fp && !fp->translated) {
 436                 r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
 437                 FALLBACK_IF(fp->error);
 438         }
 439
 440         FALLBACK_IF(ctx->RenderMode != GL_RENDER);
 441
 442         /* If GL_EXT_stencil_two_side is disabled, this fallback check can
 443          * be removed.
 444          */
 445         FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
 446                     || ctx->Stencil.ValueMask[0] !=
 447                     ctx->Stencil.ValueMask[back]
 448                     || ctx->Stencil.WriteMask[0] !=
 449                     ctx->Stencil.WriteMask[back]);
 450
 451         if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
 452                 FALLBACK_IF(ctx->Point.PointSprite);
 453
 454         if (!r300->disable_lowimpact_fallback) {
 455                 FALLBACK_IF(ctx->Polygon.StippleFlag);
 456                 FALLBACK_IF(ctx->Multisample._Enabled);
 457                 FALLBACK_IF(ctx->Line.StippleFlag);
 458                 FALLBACK_IF(ctx->Line.SmoothFlag);
 459                 FALLBACK_IF(ctx->Point.SmoothFlag);
 460         }
 461
 462         return R300_FALLBACK_NONE;
 463 }
 464
 465 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
 466                                      struct tnl_pipeline_stage *stage)
 467 {
 468         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 469
 470         if (RADEON_DEBUG & DEBUG_PRIMS)
 471                 fprintf(stderr, "%s\n", __FUNCTION__);
 472
 473         if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
 474                 return GL_TRUE;
 475
 476         if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
 477                 return GL_TRUE;
 478
 479         if (!r300ValidateBuffers(ctx))
 480             return GL_TRUE;
 481
 482         return r300RunRender(ctx, stage);
 483 }
 484
 485 static GLboolean r300RunTCLRender(GLcontext * ctx,
 486                                   struct tnl_pipeline_stage *stage)
 487 {
 488         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 489         struct r300_vertex_program *vp;
 490
 491         hw_tcl_on = future_hw_tcl_on;
 492
 493         if (RADEON_DEBUG & DEBUG_PRIMS)
 494                 fprintf(stderr, "%s\n", __FUNCTION__);
 495
 496         if (hw_tcl_on == GL_FALSE)
 497                 return GL_TRUE;
 498
 499         if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
 500                 hw_tcl_on = GL_FALSE;
 501                 return GL_TRUE;
 502         }
 503
 504         if (!r300ValidateBuffers(ctx))
 505             return GL_TRUE;
 506
 507         r300UpdateShaders(rmesa);
 508
 509         vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 510         if (vp->native == GL_FALSE) {
 511                 hw_tcl_on = GL_FALSE;
 512                 return GL_TRUE;
 513         }
 514
 515         return r300RunRender(ctx, stage);
 516 }
 517
 518 const struct tnl_pipeline_stage _r300_render_stage = {
 519         "r300 Hardware Rasterization",
 520         NULL,
 521         NULL,
 522         NULL,
 523         NULL,
 524         r300RunNonTCLRender
 525 };
 526
 527 const struct tnl_pipeline_stage _r300_tcl_stage = {
 528         "r300 Hardware Transform, Clipping and Lighting",
 529         NULL,
 530         NULL,
 531         NULL,
 532         NULL,
 533         r300RunTCLRender
 534 };