src/mesa/drivers/dri/r300/r300_render.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4
   5 All Rights Reserved.
   6
   7 Permission is hereby granted, free of charge, to any person obtaining a
   8 copy of this software and associated documentation files (the "Software"),
   9 to deal in the Software without restriction, including without limitation
  10 on the rights to use, copy, modify, merge, publish, distribute, sub
  11 license, and/or sell copies of the Software, and to permit persons to whom
  12 the Software is furnished to do so, subject to the following conditions:
  13
  14 The above copyright notice and this permission notice (including the next
  15 paragraph) shall be included in all copies or substantial portions of the
  16 Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24 USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26 **************************************************************************/
  27
  28 /**
  29  * \file
  30  *
  31  * \brief R300 Render (Vertex Buffer Implementation)
  32  *
  33  * The immediate implementation has been removed from CVS in favor of the vertex
  34  * buffer implementation.
  35  *
  36  * The render functions are called by the pipeline manager to render a batch of
  37  * primitives. They return TRUE to pass on to the next stage (i.e. software
  38  * rasterization) or FALSE to indicate that the pipeline has finished after
  39  * rendering something.
  40  *
  41  * When falling back to software TCL still attempt to use hardware
  42  * rasterization.
  43  *
  44  * I am not sure that the cache related registers are setup correctly, but
  45  * obviously this does work... Further investigation is needed.
  46  *
  47  * \author Nicolai Haehnle <prefect_@gmx.net>
  48  *
  49  * \todo Add immediate implementation back? Perhaps this is useful if there are
  50  * no bugs...
  51  */
  52
  53 #include "main/glheader.h"
  54 #include "main/state.h"
  55 #include "main/imports.h"
  56 #include "main/enums.h"
  57 #include "main/macros.h"
  58 #include "main/context.h"
  59 #include "main/dd.h"
  60 #include "main/simple_list.h"
  61 #include "main/api_arrayelt.h"
  62 #include "swrast/swrast.h"
  63 #include "swrast_setup/swrast_setup.h"
  64 #include "vbo/vbo.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_vp_build.h"
  67 #include "radeon_reg.h"
  68 #include "radeon_macros.h"
  69 #include "r300_context.h"
  70 #include "r300_ioctl.h"
  71 #include "r300_state.h"
  72 #include "r300_reg.h"
  73 #include "r300_tex.h"
  74 #include "r300_emit.h"
  75 #include "r300_fragprog.h"
  76 extern int future_hw_tcl_on;
  77
  78 /**
  79  * \brief Convert a OpenGL primitive type into a R300 primitive type.
  80  */
  81 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
  82 {
  83         switch (prim & PRIM_MODE_MASK) {
  84         case GL_POINTS:
  85                 return R300_VAP_VF_CNTL__PRIM_POINTS;
  86                 break;
  87         case GL_LINES:
  88                 return R300_VAP_VF_CNTL__PRIM_LINES;
  89                 break;
  90         case GL_LINE_STRIP:
  91                 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
  92                 break;
  93         case GL_LINE_LOOP:
  94                 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
  95                 break;
  96         case GL_TRIANGLES:
  97                 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
  98                 break;
  99         case GL_TRIANGLE_STRIP:
 100                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
 101                 break;
 102         case GL_TRIANGLE_FAN:
 103                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
 104                 break;
 105         case GL_QUADS:
 106                 return R300_VAP_VF_CNTL__PRIM_QUADS;
 107                 break;
 108         case GL_QUAD_STRIP:
 109                 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
 110                 break;
 111         case GL_POLYGON:
 112                 return R300_VAP_VF_CNTL__PRIM_POLYGON;
 113                 break;
 114         default:
 115                 assert(0);
 116                 return -1;
 117                 break;
 118         }
 119 }
 120
 121 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
 122 {
 123         int verts_off = 0;
 124
 125         switch (prim & PRIM_MODE_MASK) {
 126         case GL_POINTS:
 127                 verts_off = 0;
 128                 break;
 129         case GL_LINES:
 130                 verts_off = num_verts % 2;
 131                 break;
 132         case GL_LINE_STRIP:
 133                 if (num_verts < 2)
 134                         verts_off = num_verts;
 135                 break;
 136         case GL_LINE_LOOP:
 137                 if (num_verts < 2)
 138                         verts_off = num_verts;
 139                 break;
 140         case GL_TRIANGLES:
 141                 verts_off = num_verts % 3;
 142                 break;
 143         case GL_TRIANGLE_STRIP:
 144                 if (num_verts < 3)
 145                         verts_off = num_verts;
 146                 break;
 147         case GL_TRIANGLE_FAN:
 148                 if (num_verts < 3)
 149                         verts_off = num_verts;
 150                 break;
 151         case GL_QUADS:
 152                 verts_off = num_verts % 4;
 153                 break;
 154         case GL_QUAD_STRIP:
 155                 if (num_verts < 4)
 156                         verts_off = num_verts;
 157                 else
 158                         verts_off = num_verts % 2;
 159                 break;
 160         case GL_POLYGON:
 161                 if (num_verts < 3)
 162                         verts_off = num_verts;
 163                 break;
 164         default:
 165                 assert(0);
 166                 return -1;
 167                 break;
 168         }
 169
 170         return num_verts - verts_off;
 171 }
 172
 173 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
 174 {
 175         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 176         void *out;
 177
 178         radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo,
 179                              &rmesa->state.elt_dma_offset, n_elts * 4, 4);
 180         radeon_bo_map(rmesa->state.elt_dma_bo, 1);
 181         out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
 182         memcpy(out, elts, n_elts * 4);
 183         radeon_bo_unmap(rmesa->state.elt_dma_bo);
 184 }
 185
 186 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 187 {
 188         BATCH_LOCALS(&rmesa->radeon);
 189
 190         if (vertex_count > 0) {
 191                 BEGIN_BATCH(10);
 192                 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
 193                 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
 194                           ((vertex_count + 0) << 16) |
 195                           type |
 196                           R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
 197
 198                 if (!rmesa->radeon.radeonScreen->kernel_mm) {
 199                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 200                         OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
 201                                  (R300_VAP_PORT_IDX0 >> 2));
 202                         OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
 203                                         rmesa->state.elt_dma_bo,
 204                                         rmesa->state.elt_dma_offset,
 205                                         RADEON_GEM_DOMAIN_GTT, 0, 0);
 206                         OUT_BATCH(vertex_count);
 207                 } else {
 208                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 209                         OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
 210                                  (R300_VAP_PORT_IDX0 >> 2));
 211                         OUT_BATCH(rmesa->state.elt_dma_offset);
 212                         OUT_BATCH(vertex_count);
 213                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 214                                               rmesa->state.elt_dma_bo,
 215                                               RADEON_GEM_DOMAIN_GTT, 0, 0);
 216                 }
 217                 END_BATCH();
 218         }
 219 }
 220
 221 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 222 {
 223         BATCH_LOCALS(&rmesa->radeon);
 224         uint32_t voffset;
 225         int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 226         int i;
 227
 228         if (RADEON_DEBUG & DEBUG_VERTS)
 229                 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 230                         offset);
 231
 232
 233         if (!rmesa->radeon.radeonScreen->kernel_mm) {
 234                 BEGIN_BATCH(sz+2+(nr * 2));
 235                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 236                 OUT_BATCH(nr);
 237
 238                 for (i = 0; i + 1 < nr; i += 2) {
 239                         OUT_BATCH((rmesa->state.aos[i].components << 0) |
 240                                   (rmesa->state.aos[i].stride << 8) |
 241                                   (rmesa->state.aos[i + 1].components << 16) |
 242                                   (rmesa->state.aos[i + 1].stride << 24));
 243
 244                         voffset =  rmesa->state.aos[i + 0].offset +
 245                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 246                         OUT_BATCH_RELOC(voffset,
 247                                         rmesa->state.aos[i].bo,
 248                                         voffset,
 249                                         RADEON_GEM_DOMAIN_GTT,
 250                                         0, 0);
 251                         voffset =  rmesa->state.aos[i + 1].offset +
 252                           offset * 4 * rmesa->state.aos[i + 1].stride;
 253                         OUT_BATCH_RELOC(voffset,
 254                                         rmesa->state.aos[i+1].bo,
 255                                         voffset,
 256                                         RADEON_GEM_DOMAIN_GTT,
 257                                         0, 0);
 258                 }
 259
 260                 if (nr & 1) {
 261                         OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 262                                   (rmesa->state.aos[nr - 1].stride << 8));
 263                         voffset =  rmesa->state.aos[nr - 1].offset +
 264                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 265                         OUT_BATCH_RELOC(voffset,
 266                                         rmesa->state.aos[nr - 1].bo,
 267                                         voffset,
 268                                         RADEON_GEM_DOMAIN_GTT,
 269                                         0, 0);
 270                 }
 271                 END_BATCH();
 272         } else {
 273
 274                 BEGIN_BATCH(sz+2+(nr * 2));
 275                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 276                 OUT_BATCH(nr);
 277
 278                 for (i = 0; i + 1 < nr; i += 2) {
 279                         OUT_BATCH((rmesa->state.aos[i].components << 0) |
 280                                   (rmesa->state.aos[i].stride << 8) |
 281                                   (rmesa->state.aos[i + 1].components << 16) |
 282                                   (rmesa->state.aos[i + 1].stride << 24));
 283
 284                         voffset =  rmesa->state.aos[i + 0].offset +
 285                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 286                         OUT_BATCH(voffset);
 287                         voffset =  rmesa->state.aos[i + 1].offset +
 288                                 offset * 4 * rmesa->state.aos[i + 1].stride;
 289                         OUT_BATCH(voffset);
 290                 }
 291
 292                 if (nr & 1) {
 293                         OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 294                           (rmesa->state.aos[nr - 1].stride << 8));
 295                         voffset =  rmesa->state.aos[nr - 1].offset +
 296                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 297                         OUT_BATCH(voffset);
 298                 }
 299                 for (i = 0; i + 1 < nr; i += 2) {
 300                         voffset =  rmesa->state.aos[i + 0].offset +
 301                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 302                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 303                                               rmesa->state.aos[i+0].bo,
 304                                               RADEON_GEM_DOMAIN_GTT,
 305                                               0, 0);
 306                         voffset =  rmesa->state.aos[i + 1].offset +
 307                                 offset * 4 * rmesa->state.aos[i + 1].stride;
 308                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 309                                               rmesa->state.aos[i+1].bo,
 310                                               RADEON_GEM_DOMAIN_GTT,
 311                                               0, 0);
 312                 }
 313                 if (nr & 1) {
 314                         voffset =  rmesa->state.aos[nr - 1].offset +
 315                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 316                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 317                                               rmesa->state.aos[nr-1].bo,
 318                                               RADEON_GEM_DOMAIN_GTT,
 319                                               0, 0);
 320                 }
 321                 END_BATCH();
 322         }
 323
 324 }
 325
 326 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
 327 {
 328         BATCH_LOCALS(&rmesa->radeon);
 329
 330         BEGIN_BATCH(3);
 331         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
 332         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
 333         END_BATCH();
 334 }
 335
 336 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 337                                    int start, int end, int prim)
 338 {
 339         int type, num_verts;
 340         TNLcontext *tnl = TNL_CONTEXT(ctx);
 341         struct vertex_buffer *vb = &tnl->vb;
 342
 343         type = r300PrimitiveType(rmesa, prim);
 344         num_verts = r300NumVerts(rmesa, end - start, prim);
 345
 346         if (type < 0 || num_verts <= 0)
 347                 return;
 348
 349         /* Make space for at least 64 dwords.
 350          * This is supposed to ensure that we can get all rendering
 351          * commands into a single command buffer.
 352          */
 353         rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
 354
 355         if (vb->Elts) {
 356                 if (num_verts > 65535) {
 357                         /* not implemented yet */
 358                         WARN_ONCE("Too many elts\n");
 359                         return;
 360                 }
 361                 /* Note: The following is incorrect, but it's the best I can do
 362                  * without a major refactoring of how DMA memory is handled.
 363                  * The problem: Ensuring that both vertex arrays *and* index
 364                  * arrays are at the right position, and then ensuring that
 365                  * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
 366                  * at once.
 367                  *
 368                  * So why is the following incorrect? Well, it seems like
 369                  * allocating the index array might actually evict the vertex
 370                  * arrays. *sigh*
 371                  */
 372                 r300EmitElts(ctx, vb->Elts, num_verts);
 373                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 374                 r300FireEB(rmesa, num_verts, type);
 375         } else {
 376                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 377                 r300FireAOS(rmesa, num_verts, type);
 378         }
 379         COMMIT_BATCH();
 380 }
 381
 382 static GLboolean r300RunRender(GLcontext * ctx,
 383                                struct tnl_pipeline_stage *stage)
 384 {
 385         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 386         int i;
 387         TNLcontext *tnl = TNL_CONTEXT(ctx);
 388         struct vertex_buffer *vb = &tnl->vb;
 389
 390         if (RADEON_DEBUG & DEBUG_PRIMS)
 391                 fprintf(stderr, "%s\n", __FUNCTION__);
 392
 393         r300UpdateShaders(rmesa);
 394         if (r300EmitArrays(ctx))
 395                 return GL_TRUE;
 396
 397         r300UpdateShaderStates(rmesa);
 398
 399         r300EmitCacheFlush(rmesa);
 400         radeonEmitState(&rmesa->radeon);
 401
 402         for (i = 0; i < vb->PrimitiveCount; i++) {
 403                 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 404                 GLuint start = vb->Primitive[i].start;
 405                 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 406                 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
 407         }
 408
 409         r300EmitCacheFlush(rmesa);
 410
 411         r300ReleaseArrays(ctx);
 412
 413         return GL_FALSE;
 414 }
 415
 416 #define FALLBACK_IF(expr)                                               \
 417         do {                                                            \
 418                 if (expr) {                                             \
 419                         if (1 || RADEON_DEBUG & DEBUG_FALLBACKS)        \
 420                                 WARN_ONCE("Software fallback:%s\n",     \
 421                                           #expr);                       \
 422                         return R300_FALLBACK_RAST;                      \
 423                 }                                                       \
 424         } while(0)
 425
 426 static int r300Fallback(GLcontext * ctx)
 427 {
 428         r300ContextPtr r300 = R300_CONTEXT(ctx);
 429         const unsigned back = ctx->Stencil._BackFace;
 430
 431         FALLBACK_IF(r300->radeon.Fallback);
 432         /* Do we need to use new-style shaders?
 433          * Also is there a better way to do this? */
 434         if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
 435                 struct r500_fragment_program *fp = (struct r500_fragment_program *)
 436             (char *)ctx->FragmentProgram._Current;
 437                 if (fp) {
 438                         if (!fp->translated) {
 439                                 r500TranslateFragmentShader(r300, fp);
 440                                 FALLBACK_IF(!fp->translated);
 441                         }
 442                 }
 443         } else {
 444                 struct r300_fragment_program *fp = (struct r300_fragment_program *)
 445             (char *)ctx->FragmentProgram._Current;
 446                 if (fp) {
 447                         if (!fp->translated) {
 448                                 r300TranslateFragmentShader(r300, fp);
 449                                 FALLBACK_IF(!fp->translated);
 450                         }
 451                 }
 452         }
 453
 454         FALLBACK_IF(ctx->RenderMode != GL_RENDER);
 455
 456         /* If GL_EXT_stencil_two_side is disabled, this fallback check can
 457          * be removed.
 458          */
 459         FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
 460                     || ctx->Stencil.ValueMask[0] !=
 461                     ctx->Stencil.ValueMask[back]
 462                     || ctx->Stencil.WriteMask[0] !=
 463                     ctx->Stencil.WriteMask[back]);
 464
 465         if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
 466                 FALLBACK_IF(ctx->Point.PointSprite);
 467
 468         if (!r300->disable_lowimpact_fallback) {
 469                 FALLBACK_IF(ctx->Polygon.StippleFlag);
 470                 FALLBACK_IF(ctx->Multisample._Enabled);
 471                 FALLBACK_IF(ctx->Line.StippleFlag);
 472                 FALLBACK_IF(ctx->Line.SmoothFlag);
 473                 FALLBACK_IF(ctx->Point.SmoothFlag);
 474         }
 475
 476         return R300_FALLBACK_NONE;
 477 }
 478
 479 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
 480                                      struct tnl_pipeline_stage *stage)
 481 {
 482         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 483
 484         if (RADEON_DEBUG & DEBUG_PRIMS)
 485                 fprintf(stderr, "%s\n", __FUNCTION__);
 486
 487         if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
 488                 return GL_TRUE;
 489
 490         if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
 491                 return GL_TRUE;
 492
 493         return r300RunRender(ctx, stage);
 494 }
 495
 496 static GLboolean r300RunTCLRender(GLcontext * ctx,
 497                                   struct tnl_pipeline_stage *stage)
 498 {
 499         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 500         struct r300_vertex_program *vp;
 501
 502         hw_tcl_on = future_hw_tcl_on;
 503
 504         if (RADEON_DEBUG & DEBUG_PRIMS)
 505                 fprintf(stderr, "%s\n", __FUNCTION__);
 506
 507         if (hw_tcl_on == GL_FALSE)
 508                 return GL_TRUE;
 509
 510         if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
 511                 hw_tcl_on = GL_FALSE;
 512                 return GL_TRUE;
 513         }
 514
 515         if (!r300ValidateBuffers(ctx))
 516             return GL_TRUE;
 517
 518         r300UpdateShaders(rmesa);
 519
 520         vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 521         if (vp->native == GL_FALSE) {
 522                 hw_tcl_on = GL_FALSE;
 523                 return GL_TRUE;
 524         }
 525
 526         return r300RunRender(ctx, stage);
 527 }
 528
 529 const struct tnl_pipeline_stage _r300_render_stage = {
 530         "r300 Hardware Rasterization",
 531         NULL,
 532         NULL,
 533         NULL,
 534         NULL,
 535         r300RunNonTCLRender
 536 };
 537
 538 const struct tnl_pipeline_stage _r300_tcl_stage = {
 539         "r300 Hardware Transform, Clipping and Lighting",
 540         NULL,
 541         NULL,
 542         NULL,
 543         NULL,
 544         r300RunTCLRender
 545 };