src/mesa/drivers/dri/r300/r300_render.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4
   5 All Rights Reserved.
   6
   7 Permission is hereby granted, free of charge, to any person obtaining a
   8 copy of this software and associated documentation files (the "Software"),
   9 to deal in the Software without restriction, including without limitation
  10 on the rights to use, copy, modify, merge, publish, distribute, sub
  11 license, and/or sell copies of the Software, and to permit persons to whom
  12 the Software is furnished to do so, subject to the following conditions:
  13
  14 The above copyright notice and this permission notice (including the next
  15 paragraph) shall be included in all copies or substantial portions of the
  16 Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24 USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26 **************************************************************************/
  27
  28 /**
  29  * \file
  30  *
  31  * \brief R300 Render (Vertex Buffer Implementation)
  32  *
  33  * The immediate implementation has been removed from CVS in favor of the vertex
  34  * buffer implementation.
  35  *
  36  * The render functions are called by the pipeline manager to render a batch of
  37  * primitives. They return TRUE to pass on to the next stage (i.e. software
  38  * rasterization) or FALSE to indicate that the pipeline has finished after
  39  * rendering something.
  40  *
  41  * When falling back to software TCL still attempt to use hardware
  42  * rasterization.
  43  *
  44  * I am not sure that the cache related registers are setup correctly, but
  45  * obviously this does work... Further investigation is needed.
  46  *
  47  * \author Nicolai Haehnle <prefect_@gmx.net>
  48  *
  49  * \todo Add immediate implementation back? Perhaps this is useful if there are
  50  * no bugs...
  51  */
  52
  53 #include "main/glheader.h"
  54 #include "main/state.h"
  55 #include "main/imports.h"
  56 #include "main/enums.h"
  57 #include "main/macros.h"
  58 #include "main/context.h"
  59 #include "main/dd.h"
  60 #include "main/simple_list.h"
  61 #include "main/api_arrayelt.h"
  62 #include "swrast/swrast.h"
  63 #include "swrast_setup/swrast_setup.h"
  64 #include "vbo/vbo.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_vp_build.h"
  67 #include "radeon_reg.h"
  68 #include "radeon_macros.h"
  69 #include "r300_context.h"
  70 #include "r300_ioctl.h"
  71 #include "r300_state.h"
  72 #include "r300_reg.h"
  73 #include "r300_tex.h"
  74 #include "r300_emit.h"
  75 #include "r300_fragprog.h"
  76 extern int future_hw_tcl_on;
  77
  78 /**
  79  * \brief Convert a OpenGL primitive type into a R300 primitive type.
  80  */
  81 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
  82 {
  83         switch (prim & PRIM_MODE_MASK) {
  84         case GL_POINTS:
  85                 return R300_VAP_VF_CNTL__PRIM_POINTS;
  86                 break;
  87         case GL_LINES:
  88                 return R300_VAP_VF_CNTL__PRIM_LINES;
  89                 break;
  90         case GL_LINE_STRIP:
  91                 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
  92                 break;
  93         case GL_LINE_LOOP:
  94                 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
  95                 break;
  96         case GL_TRIANGLES:
  97                 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
  98                 break;
  99         case GL_TRIANGLE_STRIP:
 100                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
 101                 break;
 102         case GL_TRIANGLE_FAN:
 103                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
 104                 break;
 105         case GL_QUADS:
 106                 return R300_VAP_VF_CNTL__PRIM_QUADS;
 107                 break;
 108         case GL_QUAD_STRIP:
 109                 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
 110                 break;
 111         case GL_POLYGON:
 112                 return R300_VAP_VF_CNTL__PRIM_POLYGON;
 113                 break;
 114         default:
 115                 assert(0);
 116                 return -1;
 117                 break;
 118         }
 119 }
 120
 121 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
 122 {
 123         int verts_off = 0;
 124
 125         switch (prim & PRIM_MODE_MASK) {
 126         case GL_POINTS:
 127                 verts_off = 0;
 128                 break;
 129         case GL_LINES:
 130                 verts_off = num_verts % 2;
 131                 break;
 132         case GL_LINE_STRIP:
 133                 if (num_verts < 2)
 134                         verts_off = num_verts;
 135                 break;
 136         case GL_LINE_LOOP:
 137                 if (num_verts < 2)
 138                         verts_off = num_verts;
 139                 break;
 140         case GL_TRIANGLES:
 141                 verts_off = num_verts % 3;
 142                 break;
 143         case GL_TRIANGLE_STRIP:
 144                 if (num_verts < 3)
 145                         verts_off = num_verts;
 146                 break;
 147         case GL_TRIANGLE_FAN:
 148                 if (num_verts < 3)
 149                         verts_off = num_verts;
 150                 break;
 151         case GL_QUADS:
 152                 verts_off = num_verts % 4;
 153                 break;
 154         case GL_QUAD_STRIP:
 155                 if (num_verts < 4)
 156                         verts_off = num_verts;
 157                 else
 158                         verts_off = num_verts % 2;
 159                 break;
 160         case GL_POLYGON:
 161                 if (num_verts < 3)
 162                         verts_off = num_verts;
 163                 break;
 164         default:
 165                 assert(0);
 166                 return -1;
 167                 break;
 168         }
 169
 170         return num_verts - verts_off;
 171 }
 172
 173 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
 174 {
 175         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 176         void *out;
 177
 178         radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo,
 179                              &rmesa->state.elt_dma_offset, n_elts * 4, 4);
 180         radeon_bo_map(rmesa->state.elt_dma_bo, 1);
 181         out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
 182         memcpy(out, elts, n_elts * 4);
 183         radeon_bo_unmap(rmesa->state.elt_dma_bo);
 184 }
 185
 186 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 187 {
 188         BATCH_LOCALS(&rmesa->radeon);
 189
 190         if (vertex_count > 0) {
 191                 BEGIN_BATCH(10);
 192                 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
 193                 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
 194                           ((vertex_count + 0) << 16) |
 195                           type |
 196                           R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
 197
 198                 if (!rmesa->radeon.radeonScreen->kernel_mm) {
 199                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 200                         OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 201                         OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
 202                                         rmesa->state.elt_dma_bo,
 203                                         rmesa->state.elt_dma_offset,
 204                                         RADEON_GEM_DOMAIN_GTT, 0, 0);
 205                         OUT_BATCH(vertex_count);
 206                 } else {
 207                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 208                         OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 209                         OUT_BATCH(rmesa->state.elt_dma_offset);
 210                         OUT_BATCH(vertex_count);
 211                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 212                                               rmesa->state.elt_dma_bo,
 213                                               RADEON_GEM_DOMAIN_GTT, 0, 0);
 214                 }
 215                 END_BATCH();
 216         }
 217 }
 218
 219 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 220 {
 221         BATCH_LOCALS(&rmesa->radeon);
 222         uint32_t voffset;
 223         int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 224         int i;
 225
 226         if (RADEON_DEBUG & DEBUG_VERTS)
 227                 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 228                         offset);
 229
 230
 231         if (!rmesa->radeon.radeonScreen->kernel_mm) {
 232                 BEGIN_BATCH(sz+2+(nr * 2));
 233                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 234                 OUT_BATCH(nr);
 235
 236                 for (i = 0; i + 1 < nr; i += 2) {
 237                         OUT_BATCH((rmesa->state.aos[i].components << 0) |
 238                                   (rmesa->state.aos[i].stride << 8) |
 239                                   (rmesa->state.aos[i + 1].components << 16) |
 240                                   (rmesa->state.aos[i + 1].stride << 24));
 241
 242                         voffset =  rmesa->state.aos[i + 0].offset +
 243                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 244                         OUT_BATCH_RELOC(voffset,
 245                                         rmesa->state.aos[i].bo,
 246                                         voffset,
 247                                         RADEON_GEM_DOMAIN_GTT,
 248                                         0, 0);
 249                         voffset =  rmesa->state.aos[i + 1].offset +
 250                           offset * 4 * rmesa->state.aos[i + 1].stride;
 251                         OUT_BATCH_RELOC(voffset,
 252                                         rmesa->state.aos[i+1].bo,
 253                                         voffset,
 254                                         RADEON_GEM_DOMAIN_GTT,
 255                                         0, 0);
 256                 }
 257
 258                 if (nr & 1) {
 259                         OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 260                                   (rmesa->state.aos[nr - 1].stride << 8));
 261                         voffset =  rmesa->state.aos[nr - 1].offset +
 262                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 263                         OUT_BATCH_RELOC(voffset,
 264                                         rmesa->state.aos[nr - 1].bo,
 265                                         voffset,
 266                                         RADEON_GEM_DOMAIN_GTT,
 267                                         0, 0);
 268                 }
 269                 END_BATCH();
 270         } else {
 271
 272                 BEGIN_BATCH(sz+2+(nr * 2));
 273                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 274                 OUT_BATCH(nr);
 275
 276                 for (i = 0; i + 1 < nr; i += 2) {
 277                         OUT_BATCH((rmesa->state.aos[i].components << 0) |
 278                                   (rmesa->state.aos[i].stride << 8) |
 279                                   (rmesa->state.aos[i + 1].components << 16) |
 280                                   (rmesa->state.aos[i + 1].stride << 24));
 281
 282                         voffset =  rmesa->state.aos[i + 0].offset +
 283                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 284                         OUT_BATCH(voffset);
 285                         voffset =  rmesa->state.aos[i + 1].offset +
 286                                 offset * 4 * rmesa->state.aos[i + 1].stride;
 287                         OUT_BATCH(voffset);
 288                 }
 289
 290                 if (nr & 1) {
 291                         OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 292                           (rmesa->state.aos[nr - 1].stride << 8));
 293                         voffset =  rmesa->state.aos[nr - 1].offset +
 294                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 295                         OUT_BATCH(voffset);
 296                 }
 297                 for (i = 0; i + 1 < nr; i += 2) {
 298                         voffset =  rmesa->state.aos[i + 0].offset +
 299                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 300                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 301                                               rmesa->state.aos[i+0].bo,
 302                                               RADEON_GEM_DOMAIN_GTT,
 303                                               0, 0);
 304                         voffset =  rmesa->state.aos[i + 1].offset +
 305                                 offset * 4 * rmesa->state.aos[i + 1].stride;
 306                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 307                                               rmesa->state.aos[i+1].bo,
 308                                               RADEON_GEM_DOMAIN_GTT,
 309                                               0, 0);
 310                 }
 311                 if (nr & 1) {
 312                         voffset =  rmesa->state.aos[nr - 1].offset +
 313                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 314                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 315                                               rmesa->state.aos[nr-1].bo,
 316                                               RADEON_GEM_DOMAIN_GTT,
 317                                               0, 0);
 318                 }
 319                 END_BATCH();
 320         }
 321
 322 }
 323
 324 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
 325 {
 326         BATCH_LOCALS(&rmesa->radeon);
 327
 328         BEGIN_BATCH(3);
 329         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
 330         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
 331         END_BATCH();
 332 }
 333
 334 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 335                                    int start, int end, int prim)
 336 {
 337         BATCH_LOCALS(&rmesa->radeon);
 338         int type, num_verts;
 339         TNLcontext *tnl = TNL_CONTEXT(ctx);
 340         struct vertex_buffer *vb = &tnl->vb;
 341
 342         type = r300PrimitiveType(rmesa, prim);
 343         num_verts = r300NumVerts(rmesa, end - start, prim);
 344
 345         if (type < 0 || num_verts <= 0)
 346                 return;
 347
 348         /* Make space for at least 64 dwords.
 349          * This is supposed to ensure that we can get all rendering
 350          * commands into a single command buffer.
 351          */
 352         rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
 353
 354         if (vb->Elts) {
 355                 if (num_verts > 65535) {
 356                         /* not implemented yet */
 357                         WARN_ONCE("Too many elts\n");
 358                         return;
 359                 }
 360                 /* Note: The following is incorrect, but it's the best I can do
 361                  * without a major refactoring of how DMA memory is handled.
 362                  * The problem: Ensuring that both vertex arrays *and* index
 363                  * arrays are at the right position, and then ensuring that
 364                  * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
 365                  * at once.
 366                  *
 367                  * So why is the following incorrect? Well, it seems like
 368                  * allocating the index array might actually evict the vertex
 369                  * arrays. *sigh*
 370                  */
 371                 r300EmitElts(ctx, vb->Elts, num_verts);
 372                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 373                 r300FireEB(rmesa, num_verts, type);
 374         } else {
 375                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 376                 r300FireAOS(rmesa, num_verts, type);
 377         }
 378         COMMIT_BATCH();
 379 }
 380
 381 static GLboolean r300RunRender(GLcontext * ctx,
 382                                struct tnl_pipeline_stage *stage)
 383 {
 384         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 385         int i;
 386         TNLcontext *tnl = TNL_CONTEXT(ctx);
 387         struct vertex_buffer *vb = &tnl->vb;
 388
 389         if (RADEON_DEBUG & DEBUG_PRIMS)
 390                 fprintf(stderr, "%s\n", __FUNCTION__);
 391
 392         r300UpdateShaders(rmesa);
 393         if (r300EmitArrays(ctx))
 394                 return GL_TRUE;
 395
 396         r300UpdateShaderStates(rmesa);
 397
 398         r300EmitCacheFlush(rmesa);
 399         r300EmitState(rmesa);
 400
 401         for (i = 0; i < vb->PrimitiveCount; i++) {
 402                 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 403                 GLuint start = vb->Primitive[i].start;
 404                 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 405                 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
 406         }
 407
 408         r300EmitCacheFlush(rmesa);
 409
 410         r300ReleaseArrays(ctx);
 411
 412         return GL_FALSE;
 413 }
 414
 415 #define FALLBACK_IF(expr)                                               \
 416         do {                                                            \
 417                 if (expr) {                                             \
 418                         if (1 || RADEON_DEBUG & DEBUG_FALLBACKS)        \
 419                                 WARN_ONCE("Software fallback:%s\n",     \
 420                                           #expr);                       \
 421                         return R300_FALLBACK_RAST;                      \
 422                 }                                                       \
 423         } while(0)
 424
 425 static int r300Fallback(GLcontext * ctx)
 426 {
 427         r300ContextPtr r300 = R300_CONTEXT(ctx);
 428         /* Do we need to use new-style shaders?
 429          * Also is there a better way to do this? */
 430         if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
 431                 struct r500_fragment_program *fp = (struct r500_fragment_program *)
 432             (char *)ctx->FragmentProgram._Current;
 433                 if (fp) {
 434                         if (!fp->translated) {
 435                                 r500TranslateFragmentShader(r300, fp);
 436                                 FALLBACK_IF(!fp->translated);
 437                         }
 438                 }
 439         } else {
 440                 struct r300_fragment_program *fp = (struct r300_fragment_program *)
 441             (char *)ctx->FragmentProgram._Current;
 442                 if (fp) {
 443                         if (!fp->translated) {
 444                                 r300TranslateFragmentShader(r300, fp);
 445                                 FALLBACK_IF(!fp->translated);
 446                         }
 447                 }
 448         }
 449
 450         FALLBACK_IF(ctx->RenderMode != GL_RENDER);
 451
 452         FALLBACK_IF(ctx->Stencil._TestTwoSide
 453                     && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
 454                         || ctx->Stencil.ValueMask[0] !=
 455                         ctx->Stencil.ValueMask[1]
 456                         || ctx->Stencil.WriteMask[0] !=
 457                         ctx->Stencil.WriteMask[1]));
 458
 459         if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
 460                 FALLBACK_IF(ctx->Point.PointSprite);
 461
 462         if (!r300->disable_lowimpact_fallback) {
 463                 FALLBACK_IF(ctx->Polygon.StippleFlag);
 464                 FALLBACK_IF(ctx->Multisample._Enabled);
 465                 FALLBACK_IF(ctx->Line.StippleFlag);
 466                 FALLBACK_IF(ctx->Line.SmoothFlag);
 467                 FALLBACK_IF(ctx->Point.SmoothFlag);
 468         }
 469
 470         return R300_FALLBACK_NONE;
 471 }
 472
 473 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
 474                                      struct tnl_pipeline_stage *stage)
 475 {
 476         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 477
 478         if (RADEON_DEBUG & DEBUG_PRIMS)
 479                 fprintf(stderr, "%s\n", __FUNCTION__);
 480
 481         if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
 482                 return GL_TRUE;
 483
 484         if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
 485                 return GL_TRUE;
 486
 487         return r300RunRender(ctx, stage);
 488 }
 489
 490 static GLboolean r300RunTCLRender(GLcontext * ctx,
 491                                   struct tnl_pipeline_stage *stage)
 492 {
 493         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 494         struct r300_vertex_program *vp;
 495
 496         hw_tcl_on = future_hw_tcl_on;
 497
 498         if (RADEON_DEBUG & DEBUG_PRIMS)
 499                 fprintf(stderr, "%s\n", __FUNCTION__);
 500
 501         if (hw_tcl_on == GL_FALSE)
 502                 return GL_TRUE;
 503
 504         if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
 505                 hw_tcl_on = GL_FALSE;
 506                 return GL_TRUE;
 507         }
 508
 509         if (!r300ValidateBuffers(ctx))
 510             return GL_TRUE;
 511
 512         r300UpdateShaders(rmesa);
 513
 514         vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 515         if (vp->native == GL_FALSE) {
 516                 hw_tcl_on = GL_FALSE;
 517                 return GL_TRUE;
 518         }
 519
 520         return r300RunRender(ctx, stage);
 521 }
 522
 523 const struct tnl_pipeline_stage _r300_render_stage = {
 524         "r300 Hardware Rasterization",
 525         NULL,
 526         NULL,
 527         NULL,
 528         NULL,
 529         r300RunNonTCLRender
 530 };
 531
 532 const struct tnl_pipeline_stage _r300_tcl_stage = {
 533         "r300 Hardware Transform, Clipping and Lighting",
 534         NULL,
 535         NULL,
 536         NULL,
 537         NULL,
 538         r300RunTCLRender
 539 };