src/mesa/drivers/dri/r300/r300_render.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4
   5 All Rights Reserved.
   6
   7 Permission is hereby granted, free of charge, to any person obtaining a
   8 copy of this software and associated documentation files (the "Software"),
   9 to deal in the Software without restriction, including without limitation
  10 on the rights to use, copy, modify, merge, publish, distribute, sub
  11 license, and/or sell copies of the Software, and to permit persons to whom
  12 the Software is furnished to do so, subject to the following conditions:
  13
  14 The above copyright notice and this permission notice (including the next
  15 paragraph) shall be included in all copies or substantial portions of the
  16 Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24 USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26 **************************************************************************/
  27
  28 /**
  29  * \file
  30  *
  31  * \brief R300 Render (Vertex Buffer Implementation)
  32  *
  33  * The immediate implementation has been removed from CVS in favor of the vertex
  34  * buffer implementation.
  35  *
  36  * The render functions are called by the pipeline manager to render a batch of
  37  * primitives. They return TRUE to pass on to the next stage (i.e. software
  38  * rasterization) or FALSE to indicate that the pipeline has finished after
  39  * rendering something.
  40  *
  41  * When falling back to software TCL still attempt to use hardware
  42  * rasterization.
  43  *
  44  * I am not sure that the cache related registers are setup correctly, but
  45  * obviously this does work... Further investigation is needed.
  46  *
  47  * \author Nicolai Haehnle <prefect_@gmx.net>
  48  *
  49  * \todo Add immediate implementation back? Perhaps this is useful if there are
  50  * no bugs...
  51  */
  52
  53 #include "main/glheader.h"
  54 #include "main/state.h"
  55 #include "main/imports.h"
  56 #include "main/enums.h"
  57 #include "main/macros.h"
  58 #include "main/context.h"
  59 #include "main/dd.h"
  60 #include "main/simple_list.h"
  61 #include "main/api_arrayelt.h"
  62 #include "swrast/swrast.h"
  63 #include "swrast_setup/swrast_setup.h"
  64 #include "vbo/vbo.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_vp_build.h"
  67 #include "radeon_reg.h"
  68 #include "radeon_macros.h"
  69 #include "radeon_ioctl.h"
  70 #include "radeon_state.h"
  71 #include "r300_context.h"
  72 #include "r300_ioctl.h"
  73 #include "r300_state.h"
  74 #include "r300_reg.h"
  75 #include "r300_tex.h"
  76 #include "r300_emit.h"
  77 #include "r300_fragprog.h"
  78 extern int future_hw_tcl_on;
  79
  80 /**
  81  * \brief Convert a OpenGL primitive type into a R300 primitive type.
  82  */
  83 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
  84 {
  85         switch (prim & PRIM_MODE_MASK) {
  86         case GL_POINTS:
  87                 return R300_VAP_VF_CNTL__PRIM_POINTS;
  88                 break;
  89         case GL_LINES:
  90                 return R300_VAP_VF_CNTL__PRIM_LINES;
  91                 break;
  92         case GL_LINE_STRIP:
  93                 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
  94                 break;
  95         case GL_LINE_LOOP:
  96                 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
  97                 break;
  98         case GL_TRIANGLES:
  99                 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
 100                 break;
 101         case GL_TRIANGLE_STRIP:
 102                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
 103                 break;
 104         case GL_TRIANGLE_FAN:
 105                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
 106                 break;
 107         case GL_QUADS:
 108                 return R300_VAP_VF_CNTL__PRIM_QUADS;
 109                 break;
 110         case GL_QUAD_STRIP:
 111                 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
 112                 break;
 113         case GL_POLYGON:
 114                 return R300_VAP_VF_CNTL__PRIM_POLYGON;
 115                 break;
 116         default:
 117                 assert(0);
 118                 return -1;
 119                 break;
 120         }
 121 }
 122
 123 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
 124 {
 125         int verts_off = 0;
 126
 127         switch (prim & PRIM_MODE_MASK) {
 128         case GL_POINTS:
 129                 verts_off = 0;
 130                 break;
 131         case GL_LINES:
 132                 verts_off = num_verts % 2;
 133                 break;
 134         case GL_LINE_STRIP:
 135                 if (num_verts < 2)
 136                         verts_off = num_verts;
 137                 break;
 138         case GL_LINE_LOOP:
 139                 if (num_verts < 2)
 140                         verts_off = num_verts;
 141                 break;
 142         case GL_TRIANGLES:
 143                 verts_off = num_verts % 3;
 144                 break;
 145         case GL_TRIANGLE_STRIP:
 146                 if (num_verts < 3)
 147                         verts_off = num_verts;
 148                 break;
 149         case GL_TRIANGLE_FAN:
 150                 if (num_verts < 3)
 151                         verts_off = num_verts;
 152                 break;
 153         case GL_QUADS:
 154                 verts_off = num_verts % 4;
 155                 break;
 156         case GL_QUAD_STRIP:
 157                 if (num_verts < 4)
 158                         verts_off = num_verts;
 159                 else
 160                         verts_off = num_verts % 2;
 161                 break;
 162         case GL_POLYGON:
 163                 if (num_verts < 3)
 164                         verts_off = num_verts;
 165                 break;
 166         default:
 167                 assert(0);
 168                 return -1;
 169                 break;
 170         }
 171
 172         return num_verts - verts_off;
 173 }
 174
 175 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
 176 {
 177         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 178         void *out;
 179
 180         radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo,
 181                              &rmesa->state.elt_dma_offset, n_elts * 4, 4);
 182         radeon_bo_map(rmesa->state.elt_dma_bo, 1);
 183         out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
 184         memcpy(out, elts, n_elts * 4);
 185         radeon_bo_unmap(rmesa->state.elt_dma_bo);
 186 }
 187
 188 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 189 {
 190         BATCH_LOCALS(&rmesa->radeon);
 191
 192         if (vertex_count > 0) {
 193                 BEGIN_BATCH(10);
 194                 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
 195                 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
 196                           ((vertex_count + 0) << 16) |
 197                           type |
 198                           R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
 199
 200                 if (!rmesa->radeon.radeonScreen->kernel_mm) {
 201                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 202                         OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 203                         OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
 204                                         rmesa->state.elt_dma_bo,
 205                                         rmesa->state.elt_dma_offset,
 206                                         RADEON_GEM_DOMAIN_GTT, 0, 0);
 207                         OUT_BATCH(vertex_count);
 208                 } else {
 209                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 210                         OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 211                         OUT_BATCH(rmesa->state.elt_dma_offset);
 212                         OUT_BATCH(vertex_count);
 213                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 214                                               rmesa->state.elt_dma_bo,
 215                                               RADEON_GEM_DOMAIN_GTT, 0, 0);
 216                 }
 217                 END_BATCH();
 218         }
 219 }
 220
 221 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 222 {
 223         BATCH_LOCALS(&rmesa->radeon);
 224         uint32_t voffset;
 225         int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 226         int i;
 227
 228         if (RADEON_DEBUG & DEBUG_VERTS)
 229                 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 230                         offset);
 231
 232
 233         if (!rmesa->radeon.radeonScreen->kernel_mm) {
 234                 BEGIN_BATCH(sz+2+(nr * 2));
 235                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 236                 OUT_BATCH(nr);
 237
 238                 for (i = 0; i + 1 < nr; i += 2) {
 239                         OUT_BATCH((rmesa->state.aos[i].components << 0) |
 240                                   (rmesa->state.aos[i].stride << 8) |
 241                                   (rmesa->state.aos[i + 1].components << 16) |
 242                                   (rmesa->state.aos[i + 1].stride << 24));
 243
 244                         voffset =  rmesa->state.aos[i + 0].offset +
 245                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 246                         OUT_BATCH_RELOC(voffset,
 247                                         rmesa->state.aos[i].bo,
 248                                         voffset,
 249                                         RADEON_GEM_DOMAIN_GTT,
 250                                         0, 0);
 251                         voffset =  rmesa->state.aos[i + 1].offset +
 252                           offset * 4 * rmesa->state.aos[i + 1].stride;
 253                         OUT_BATCH_RELOC(voffset,
 254                                         rmesa->state.aos[i+1].bo,
 255                                         voffset,
 256                                         RADEON_GEM_DOMAIN_GTT,
 257                                         0, 0);
 258                 }
 259
 260                 if (nr & 1) {
 261                         OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 262                                   (rmesa->state.aos[nr - 1].stride << 8));
 263                         voffset =  rmesa->state.aos[nr - 1].offset +
 264                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 265                         OUT_BATCH_RELOC(voffset,
 266                                         rmesa->state.aos[nr - 1].bo,
 267                                         voffset,
 268                                         RADEON_GEM_DOMAIN_GTT,
 269                                         0, 0);
 270                 }
 271                 END_BATCH();
 272         } else {
 273
 274                 BEGIN_BATCH(sz+2+(nr * 2));
 275                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 276                 OUT_BATCH(nr);
 277
 278                 for (i = 0; i + 1 < nr; i += 2) {
 279                         OUT_BATCH((rmesa->state.aos[i].components << 0) |
 280                                   (rmesa->state.aos[i].stride << 8) |
 281                                   (rmesa->state.aos[i + 1].components << 16) |
 282                                   (rmesa->state.aos[i + 1].stride << 24));
 283
 284                         voffset =  rmesa->state.aos[i + 0].offset +
 285                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 286                         OUT_BATCH(voffset);
 287                         voffset =  rmesa->state.aos[i + 1].offset +
 288                                 offset * 4 * rmesa->state.aos[i + 1].stride;
 289                         OUT_BATCH(voffset);
 290                 }
 291
 292                 if (nr & 1) {
 293                         OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 294                           (rmesa->state.aos[nr - 1].stride << 8));
 295                         voffset =  rmesa->state.aos[nr - 1].offset +
 296                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 297                         OUT_BATCH(voffset);
 298                 }
 299                 for (i = 0; i + 1 < nr; i += 2) {
 300                         voffset =  rmesa->state.aos[i + 0].offset +
 301                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 302                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 303                                               rmesa->state.aos[i+0].bo,
 304                                               RADEON_GEM_DOMAIN_GTT,
 305                                               0, 0);
 306                         voffset =  rmesa->state.aos[i + 1].offset +
 307                                 offset * 4 * rmesa->state.aos[i + 1].stride;
 308                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 309                                               rmesa->state.aos[i+1].bo,
 310                                               RADEON_GEM_DOMAIN_GTT,
 311                                               0, 0);
 312                 }
 313                 if (nr & 1) {
 314                         voffset =  rmesa->state.aos[nr - 1].offset +
 315                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 316                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 317                                               rmesa->state.aos[nr-1].bo,
 318                                               RADEON_GEM_DOMAIN_GTT,
 319                                               0, 0);
 320                 }
 321                 END_BATCH();
 322         }
 323
 324 }
 325
 326 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
 327 {
 328         BATCH_LOCALS(&rmesa->radeon);
 329
 330         BEGIN_BATCH(3);
 331         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
 332         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
 333         END_BATCH();
 334 }
 335
 336 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 337                                    int start, int end, int prim)
 338 {
 339         BATCH_LOCALS(&rmesa->radeon);
 340         int type, num_verts;
 341         TNLcontext *tnl = TNL_CONTEXT(ctx);
 342         struct vertex_buffer *vb = &tnl->vb;
 343
 344         type = r300PrimitiveType(rmesa, prim);
 345         num_verts = r300NumVerts(rmesa, end - start, prim);
 346
 347         if (type < 0 || num_verts <= 0)
 348                 return;
 349
 350         /* Make space for at least 64 dwords.
 351          * This is supposed to ensure that we can get all rendering
 352          * commands into a single command buffer.
 353          */
 354         rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
 355
 356         if (vb->Elts) {
 357                 if (num_verts > 65535) {
 358                         /* not implemented yet */
 359                         WARN_ONCE("Too many elts\n");
 360                         return;
 361                 }
 362                 /* Note: The following is incorrect, but it's the best I can do
 363                  * without a major refactoring of how DMA memory is handled.
 364                  * The problem: Ensuring that both vertex arrays *and* index
 365                  * arrays are at the right position, and then ensuring that
 366                  * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
 367                  * at once.
 368                  *
 369                  * So why is the following incorrect? Well, it seems like
 370                  * allocating the index array might actually evict the vertex
 371                  * arrays. *sigh*
 372                  */
 373                 r300EmitElts(ctx, vb->Elts, num_verts);
 374                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 375                 r300FireEB(rmesa, num_verts, type);
 376         } else {
 377                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 378                 r300FireAOS(rmesa, num_verts, type);
 379         }
 380         COMMIT_BATCH();
 381 }
 382
 383 static GLboolean r300RunRender(GLcontext * ctx,
 384                                struct tnl_pipeline_stage *stage)
 385 {
 386         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 387         int i;
 388         TNLcontext *tnl = TNL_CONTEXT(ctx);
 389         struct vertex_buffer *vb = &tnl->vb;
 390
 391         if (RADEON_DEBUG & DEBUG_PRIMS)
 392                 fprintf(stderr, "%s\n", __FUNCTION__);
 393
 394         r300UpdateShaders(rmesa);
 395         if (r300EmitArrays(ctx))
 396                 return GL_TRUE;
 397
 398         r300UpdateShaderStates(rmesa);
 399
 400         r300EmitCacheFlush(rmesa);
 401         r300EmitState(rmesa);
 402
 403         for (i = 0; i < vb->PrimitiveCount; i++) {
 404                 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 405                 GLuint start = vb->Primitive[i].start;
 406                 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 407                 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
 408         }
 409
 410         r300EmitCacheFlush(rmesa);
 411
 412         r300ReleaseArrays(ctx);
 413
 414         return GL_FALSE;
 415 }
 416
 417 #define FALLBACK_IF(expr)                                               \
 418         do {                                                            \
 419                 if (expr) {                                             \
 420                         if (1 || RADEON_DEBUG & DEBUG_FALLBACKS)        \
 421                                 WARN_ONCE("Software fallback:%s\n",     \
 422                                           #expr);                       \
 423                         return R300_FALLBACK_RAST;                      \
 424                 }                                                       \
 425         } while(0)
 426
 427 static int r300Fallback(GLcontext * ctx)
 428 {
 429         r300ContextPtr r300 = R300_CONTEXT(ctx);
 430         /* Do we need to use new-style shaders?
 431          * Also is there a better way to do this? */
 432         if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
 433                 struct r500_fragment_program *fp = (struct r500_fragment_program *)
 434             (char *)ctx->FragmentProgram._Current;
 435                 if (fp) {
 436                         if (!fp->translated) {
 437                                 r500TranslateFragmentShader(r300, fp);
 438                                 FALLBACK_IF(!fp->translated);
 439                         }
 440                 }
 441         } else {
 442                 struct r300_fragment_program *fp = (struct r300_fragment_program *)
 443             (char *)ctx->FragmentProgram._Current;
 444                 if (fp) {
 445                         if (!fp->translated) {
 446                                 r300TranslateFragmentShader(r300, fp);
 447                                 FALLBACK_IF(!fp->translated);
 448                         }
 449                 }
 450         }
 451
 452         FALLBACK_IF(ctx->RenderMode != GL_RENDER);
 453
 454         FALLBACK_IF(ctx->Stencil._TestTwoSide
 455                     && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
 456                         || ctx->Stencil.ValueMask[0] !=
 457                         ctx->Stencil.ValueMask[1]
 458                         || ctx->Stencil.WriteMask[0] !=
 459                         ctx->Stencil.WriteMask[1]));
 460
 461         if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
 462                 FALLBACK_IF(ctx->Point.PointSprite);
 463
 464         if (!r300->disable_lowimpact_fallback) {
 465                 FALLBACK_IF(ctx->Polygon.StippleFlag);
 466                 FALLBACK_IF(ctx->Multisample._Enabled);
 467                 FALLBACK_IF(ctx->Line.StippleFlag);
 468                 FALLBACK_IF(ctx->Line.SmoothFlag);
 469                 FALLBACK_IF(ctx->Point.SmoothFlag);
 470         }
 471
 472         return R300_FALLBACK_NONE;
 473 }
 474
 475 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
 476                                      struct tnl_pipeline_stage *stage)
 477 {
 478         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 479
 480         if (RADEON_DEBUG & DEBUG_PRIMS)
 481                 fprintf(stderr, "%s\n", __FUNCTION__);
 482
 483         if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
 484                 return GL_TRUE;
 485
 486         if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
 487                 return GL_TRUE;
 488
 489         return r300RunRender(ctx, stage);
 490 }
 491
 492 static GLboolean r300RunTCLRender(GLcontext * ctx,
 493                                   struct tnl_pipeline_stage *stage)
 494 {
 495         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 496         struct r300_vertex_program *vp;
 497
 498         hw_tcl_on = future_hw_tcl_on;
 499
 500         if (RADEON_DEBUG & DEBUG_PRIMS)
 501                 fprintf(stderr, "%s\n", __FUNCTION__);
 502
 503         if (hw_tcl_on == GL_FALSE)
 504                 return GL_TRUE;
 505
 506         if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
 507                 hw_tcl_on = GL_FALSE;
 508                 return GL_TRUE;
 509         }
 510
 511         if (!r300ValidateBuffers(ctx))
 512             return GL_TRUE;
 513
 514         r300UpdateShaders(rmesa);
 515
 516         vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 517         if (vp->native == GL_FALSE) {
 518                 hw_tcl_on = GL_FALSE;
 519                 return GL_TRUE;
 520         }
 521
 522         return r300RunRender(ctx, stage);
 523 }
 524
 525 const struct tnl_pipeline_stage _r300_render_stage = {
 526         "r300 Hardware Rasterization",
 527         NULL,
 528         NULL,
 529         NULL,
 530         NULL,
 531         r300RunNonTCLRender
 532 };
 533
 534 const struct tnl_pipeline_stage _r300_tcl_stage = {
 535         "r300 Hardware Transform, Clipping and Lighting",
 536         NULL,
 537         NULL,
 538         NULL,
 539         NULL,
 540         r300RunTCLRender
 541 };