src/mesa/drivers/dri/r300/r300_render.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4
   5 All Rights Reserved.
   6
   7 Permission is hereby granted, free of charge, to any person obtaining a
   8 copy of this software and associated documentation files (the "Software"),
   9 to deal in the Software without restriction, including without limitation
  10 on the rights to use, copy, modify, merge, publish, distribute, sub
  11 license, and/or sell copies of the Software, and to permit persons to whom
  12 the Software is furnished to do so, subject to the following conditions:
  13
  14 The above copyright notice and this permission notice (including the next
  15 paragraph) shall be included in all copies or substantial portions of the
  16 Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24 USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26 **************************************************************************/
  27
  28 /**
  29  * \file
  30  *
  31  * \brief R300 Render (Vertex Buffer Implementation)
  32  *
  33  * The immediate implementation has been removed from CVS in favor of the vertex
  34  * buffer implementation.
  35  *
  36  * The render functions are called by the pipeline manager to render a batch of
  37  * primitives. They return TRUE to pass on to the next stage (i.e. software
  38  * rasterization) or FALSE to indicate that the pipeline has finished after
  39  * rendering something.
  40  *
  41  * When falling back to software TCL still attempt to use hardware
  42  * rasterization.
  43  *
  44  * I am not sure that the cache related registers are setup correctly, but
  45  * obviously this does work... Further investigation is needed.
  46  *
  47  * \author Nicolai Haehnle <prefect_@gmx.net>
  48  *
  49  * \todo Add immediate implementation back? Perhaps this is useful if there are
  50  * no bugs...
  51  */
  52
  53 #include "main/glheader.h"
  54 #include "main/state.h"
  55 #include "main/imports.h"
  56 #include "main/enums.h"
  57 #include "main/macros.h"
  58 #include "main/context.h"
  59 #include "main/dd.h"
  60 #include "main/simple_list.h"
  61 #include "main/api_arrayelt.h"
  62 #include "swrast/swrast.h"
  63 #include "swrast_setup/swrast_setup.h"
  64 #include "vbo/vbo.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_vp_build.h"
  67 #include "radeon_reg.h"
  68 #include "radeon_macros.h"
  69 #include "r300_context.h"
  70 #include "r300_ioctl.h"
  71 #include "r300_state.h"
  72 #include "r300_reg.h"
  73 #include "r300_tex.h"
  74 #include "r300_emit.h"
  75 #include "r300_fragprog_common.h"
  76
  77 /**
  78  * \brief Convert a OpenGL primitive type into a R300 primitive type.
  79  */
  80 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
  81 {
  82         switch (prim & PRIM_MODE_MASK) {
  83         case GL_POINTS:
  84                 return R300_VAP_VF_CNTL__PRIM_POINTS;
  85                 break;
  86         case GL_LINES:
  87                 return R300_VAP_VF_CNTL__PRIM_LINES;
  88                 break;
  89         case GL_LINE_STRIP:
  90                 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
  91                 break;
  92         case GL_LINE_LOOP:
  93                 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
  94                 break;
  95         case GL_TRIANGLES:
  96                 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
  97                 break;
  98         case GL_TRIANGLE_STRIP:
  99                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
 100                 break;
 101         case GL_TRIANGLE_FAN:
 102                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
 103                 break;
 104         case GL_QUADS:
 105                 return R300_VAP_VF_CNTL__PRIM_QUADS;
 106                 break;
 107         case GL_QUAD_STRIP:
 108                 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
 109                 break;
 110         case GL_POLYGON:
 111                 return R300_VAP_VF_CNTL__PRIM_POLYGON;
 112                 break;
 113         default:
 114                 assert(0);
 115                 return -1;
 116                 break;
 117         }
 118 }
 119
 120 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
 121 {
 122         int verts_off = 0;
 123
 124         switch (prim & PRIM_MODE_MASK) {
 125         case GL_POINTS:
 126                 verts_off = 0;
 127                 break;
 128         case GL_LINES:
 129                 verts_off = num_verts % 2;
 130                 break;
 131         case GL_LINE_STRIP:
 132                 if (num_verts < 2)
 133                         verts_off = num_verts;
 134                 break;
 135         case GL_LINE_LOOP:
 136                 if (num_verts < 2)
 137                         verts_off = num_verts;
 138                 break;
 139         case GL_TRIANGLES:
 140                 verts_off = num_verts % 3;
 141                 break;
 142         case GL_TRIANGLE_STRIP:
 143                 if (num_verts < 3)
 144                         verts_off = num_verts;
 145                 break;
 146         case GL_TRIANGLE_FAN:
 147                 if (num_verts < 3)
 148                         verts_off = num_verts;
 149                 break;
 150         case GL_QUADS:
 151                 verts_off = num_verts % 4;
 152                 break;
 153         case GL_QUAD_STRIP:
 154                 if (num_verts < 4)
 155                         verts_off = num_verts;
 156                 else
 157                         verts_off = num_verts % 2;
 158                 break;
 159         case GL_POLYGON:
 160                 if (num_verts < 3)
 161                         verts_off = num_verts;
 162                 break;
 163         default:
 164                 assert(0);
 165                 return -1;
 166                 break;
 167         }
 168
 169         return num_verts - verts_off;
 170 }
 171
 172 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
 173 {
 174         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 175         void *out;
 176
 177         radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
 178                              &rmesa->radeon.tcl.elt_dma_offset, n_elts * 4, 4);
 179         radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
 180         out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
 181         memcpy(out, elts, n_elts * 4);
 182         radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
 183 }
 184
 185 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 186 {
 187         BATCH_LOCALS(&rmesa->radeon);
 188
 189         if (vertex_count > 0) {
 190                 BEGIN_BATCH(10);
 191                 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
 192                 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
 193                           ((vertex_count + 0) << 16) |
 194                           type |
 195                           R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
 196
 197                 if (!rmesa->radeon.radeonScreen->kernel_mm) {
 198                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 199                         OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
 200                                  (R300_VAP_PORT_IDX0 >> 2));
 201                         OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
 202                                         rmesa->radeon.tcl.elt_dma_bo,
 203                                         rmesa->radeon.tcl.elt_dma_offset,
 204                                         RADEON_GEM_DOMAIN_GTT, 0, 0);
 205                         OUT_BATCH(vertex_count);
 206                 } else {
 207                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 208                         OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
 209                                  (R300_VAP_PORT_IDX0 >> 2));
 210                         OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
 211                         OUT_BATCH(vertex_count);
 212                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 213                                               rmesa->radeon.tcl.elt_dma_bo,
 214                                               RADEON_GEM_DOMAIN_GTT, 0, 0);
 215                 }
 216                 END_BATCH();
 217         }
 218 }
 219
 220 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 221 {
 222         BATCH_LOCALS(&rmesa->radeon);
 223         uint32_t voffset;
 224         int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 225         int i;
 226
 227         if (RADEON_DEBUG & DEBUG_VERTS)
 228                 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 229                         offset);
 230
 231
 232         if (!rmesa->radeon.radeonScreen->kernel_mm) {
 233                 BEGIN_BATCH(sz+2+(nr * 2));
 234                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 235                 OUT_BATCH(nr);
 236
 237                 for (i = 0; i + 1 < nr; i += 2) {
 238                         OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
 239                                   (rmesa->radeon.tcl.aos[i].stride << 8) |
 240                                   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
 241                                   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
 242
 243                         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
 244                                 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
 245                         OUT_BATCH_RELOC(voffset,
 246                                         rmesa->radeon.tcl.aos[i].bo,
 247                                         voffset,
 248                                         RADEON_GEM_DOMAIN_GTT,
 249                                         0, 0);
 250                         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
 251                           offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
 252                         OUT_BATCH_RELOC(voffset,
 253                                         rmesa->radeon.tcl.aos[i+1].bo,
 254                                         voffset,
 255                                         RADEON_GEM_DOMAIN_GTT,
 256                                         0, 0);
 257                 }
 258
 259                 if (nr & 1) {
 260                         OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
 261                                   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
 262                         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
 263                                 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
 264                         OUT_BATCH_RELOC(voffset,
 265                                         rmesa->radeon.tcl.aos[nr - 1].bo,
 266                                         voffset,
 267                                         RADEON_GEM_DOMAIN_GTT,
 268                                         0, 0);
 269                 }
 270                 END_BATCH();
 271         } else {
 272
 273                 BEGIN_BATCH(sz+2+(nr * 2));
 274                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 275                 OUT_BATCH(nr);
 276
 277                 for (i = 0; i + 1 < nr; i += 2) {
 278                         OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
 279                                   (rmesa->radeon.tcl.aos[i].stride << 8) |
 280                                   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
 281                                   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
 282
 283                         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
 284                                 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
 285                         OUT_BATCH(voffset);
 286                         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
 287                                 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
 288                         OUT_BATCH(voffset);
 289                 }
 290
 291                 if (nr & 1) {
 292                         OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
 293                           (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
 294                         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
 295                                 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
 296                         OUT_BATCH(voffset);
 297                 }
 298                 for (i = 0; i + 1 < nr; i += 2) {
 299                         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
 300                                 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
 301                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 302                                               rmesa->radeon.tcl.aos[i+0].bo,
 303                                               RADEON_GEM_DOMAIN_GTT,
 304                                               0, 0);
 305                         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
 306                                 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
 307                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 308                                               rmesa->radeon.tcl.aos[i+1].bo,
 309                                               RADEON_GEM_DOMAIN_GTT,
 310                                               0, 0);
 311                 }
 312                 if (nr & 1) {
 313                         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
 314                                 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
 315                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 316                                               rmesa->radeon.tcl.aos[nr-1].bo,
 317                                               RADEON_GEM_DOMAIN_GTT,
 318                                               0, 0);
 319                 }
 320                 END_BATCH();
 321         }
 322
 323 }
 324
 325 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
 326 {
 327         BATCH_LOCALS(&rmesa->radeon);
 328
 329         BEGIN_BATCH(3);
 330         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
 331         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
 332         END_BATCH();
 333 }
 334
 335 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 336                                    int start, int end, int prim)
 337 {
 338         int type, num_verts;
 339         TNLcontext *tnl = TNL_CONTEXT(ctx);
 340         struct vertex_buffer *vb = &tnl->vb;
 341
 342         type = r300PrimitiveType(rmesa, prim);
 343         num_verts = r300NumVerts(rmesa, end - start, prim);
 344
 345         if (type < 0 || num_verts <= 0)
 346                 return;
 347
 348         /* Make space for at least 64 dwords.
 349          * This is supposed to ensure that we can get all rendering
 350          * commands into a single command buffer.
 351          */
 352         rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
 353
 354         if (vb->Elts) {
 355                 if (num_verts > 65535) {
 356                         /* not implemented yet */
 357                         WARN_ONCE("Too many elts\n");
 358                         return;
 359                 }
 360                 /* Note: The following is incorrect, but it's the best I can do
 361                  * without a major refactoring of how DMA memory is handled.
 362                  * The problem: Ensuring that both vertex arrays *and* index
 363                  * arrays are at the right position, and then ensuring that
 364                  * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
 365                  * at once.
 366                  *
 367                  * So why is the following incorrect? Well, it seems like
 368                  * allocating the index array might actually evict the vertex
 369                  * arrays. *sigh*
 370                  */
 371                 r300EmitElts(ctx, vb->Elts, num_verts);
 372                 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
 373                 r300FireEB(rmesa, num_verts, type);
 374         } else {
 375                 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
 376                 r300FireAOS(rmesa, num_verts, type);
 377         }
 378         COMMIT_BATCH();
 379 }
 380
 381 static GLboolean r300RunRender(GLcontext * ctx,
 382                                struct tnl_pipeline_stage *stage)
 383 {
 384         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 385         int i;
 386         TNLcontext *tnl = TNL_CONTEXT(ctx);
 387         struct vertex_buffer *vb = &tnl->vb;
 388
 389         if (RADEON_DEBUG & DEBUG_PRIMS)
 390                 fprintf(stderr, "%s\n", __FUNCTION__);
 391
 392         r300UpdateShaders(rmesa);
 393         if (r300EmitArrays(ctx))
 394                 return GL_TRUE;
 395
 396         r300UpdateShaderStates(rmesa);
 397
 398         r300EmitCacheFlush(rmesa);
 399         radeonEmitState(&rmesa->radeon);
 400
 401         for (i = 0; i < vb->PrimitiveCount; i++) {
 402                 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 403                 GLuint start = vb->Primitive[i].start;
 404                 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 405                 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
 406         }
 407
 408         r300EmitCacheFlush(rmesa);
 409
 410         radeonReleaseArrays(ctx, ~0);
 411
 412         return GL_FALSE;
 413 }
 414
 415 #define FALLBACK_IF(expr)                                               \
 416         do {                                                            \
 417                 if (expr) {                                             \
 418                         if (1 || RADEON_DEBUG & DEBUG_FALLBACKS)        \
 419                                 WARN_ONCE("Software fallback:%s\n",     \
 420                                           #expr);                       \
 421                         return R300_FALLBACK_RAST;                      \
 422                 }                                                       \
 423         } while(0)
 424
 425 static int r300Fallback(GLcontext * ctx)
 426 {
 427         r300ContextPtr r300 = R300_CONTEXT(ctx);
 428         const unsigned back = ctx->Stencil._BackFace;
 429
 430         FALLBACK_IF(r300->radeon.Fallback);
 431
 432         struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
 433         if (fp && !fp->translated) {
 434                 r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
 435                 FALLBACK_IF(fp->error);
 436         }
 437
 438         FALLBACK_IF(ctx->RenderMode != GL_RENDER);
 439
 440         FALLBACK_IF(ctx->Stencil.Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
 441                     || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
 442                     || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back]));
 443
 444         if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
 445                 FALLBACK_IF(ctx->Point.PointSprite);
 446
 447         if (!r300->disable_lowimpact_fallback) {
 448                 FALLBACK_IF(ctx->Polygon.StippleFlag);
 449                 FALLBACK_IF(ctx->Multisample._Enabled);
 450                 FALLBACK_IF(ctx->Line.StippleFlag);
 451                 FALLBACK_IF(ctx->Line.SmoothFlag);
 452                 FALLBACK_IF(ctx->Point.SmoothFlag);
 453         }
 454
 455         return R300_FALLBACK_NONE;
 456 }
 457
 458 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
 459                                      struct tnl_pipeline_stage *stage)
 460 {
 461         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 462
 463         if (RADEON_DEBUG & DEBUG_PRIMS)
 464                 fprintf(stderr, "%s\n", __FUNCTION__);
 465
 466         if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
 467                 return GL_TRUE;
 468
 469         if (rmesa->options.hw_tcl_enabled == GL_FALSE)
 470                 return GL_TRUE;
 471
 472         if (!r300ValidateBuffers(ctx))
 473             return GL_TRUE;
 474
 475         return r300RunRender(ctx, stage);
 476 }
 477
 478 static GLboolean r300RunTCLRender(GLcontext * ctx,
 479                                   struct tnl_pipeline_stage *stage)
 480 {
 481         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 482         struct r300_vertex_program *vp;
 483
 484         if (RADEON_DEBUG & DEBUG_PRIMS)
 485                 fprintf(stderr, "%s\n", __FUNCTION__);
 486
 487         if (rmesa->options.hw_tcl_enabled == GL_FALSE)
 488                 return GL_TRUE;
 489
 490         if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
 491                 rmesa->options.hw_tcl_enabled = GL_FALSE;
 492                 return GL_TRUE;
 493         }
 494
 495         if (!r300ValidateBuffers(ctx))
 496             return GL_TRUE;
 497
 498         r300UpdateShaders(rmesa);
 499
 500         vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 501         if (vp->native == GL_FALSE) {
 502                 rmesa->options.hw_tcl_enabled = GL_FALSE;
 503                 return GL_TRUE;
 504         }
 505
 506         return r300RunRender(ctx, stage);
 507 }
 508
 509 const struct tnl_pipeline_stage _r300_render_stage = {
 510         "r300 Hardware Rasterization",
 511         NULL,
 512         NULL,
 513         NULL,
 514         NULL,
 515         r300RunNonTCLRender
 516 };
 517
 518 const struct tnl_pipeline_stage _r300_tcl_stage = {
 519         "r300 Hardware Transform, Clipping and Lighting",
 520         NULL,
 521         NULL,
 522         NULL,
 523         NULL,
 524         r300RunTCLRender
 525 };