src/mesa/drivers/dri/r300/r300_render.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4
   5 All Rights Reserved.
   6
   7 Permission is hereby granted, free of charge, to any person obtaining a
   8 copy of this software and associated documentation files (the "Software"),
   9 to deal in the Software without restriction, including without limitation
  10 on the rights to use, copy, modify, merge, publish, distribute, sub
  11 license, and/or sell copies of the Software, and to permit persons to whom
  12 the Software is furnished to do so, subject to the following conditions:
  13
  14 The above copyright notice and this permission notice (including the next
  15 paragraph) shall be included in all copies or substantial portions of the
  16 Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24 USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26 **************************************************************************/
  27
  28 /**
  29  * \file
  30  *
  31  * \brief R300 Render (Vertex Buffer Implementation)
  32  *
  33  * The immediate implementation has been removed from CVS in favor of the vertex
  34  * buffer implementation.
  35  *
  36  * The render functions are called by the pipeline manager to render a batch of
  37  * primitives. They return TRUE to pass on to the next stage (i.e. software
  38  * rasterization) or FALSE to indicate that the pipeline has finished after
  39  * rendering something.
  40  *
  41  * When falling back to software TCL still attempt to use hardware
  42  * rasterization.
  43  *
  44  * I am not sure that the cache related registers are setup correctly, but
  45  * obviously this does work... Further investigation is needed.
  46  *
  47  * \author Nicolai Haehnle <prefect_@gmx.net>
  48  *
  49  * \todo Add immediate implementation back? Perhaps this is useful if there are
  50  * no bugs...
  51  */
  52
  53 #include "main/glheader.h"
  54 #include "main/state.h"
  55 #include "main/imports.h"
  56 #include "main/enums.h"
  57 #include "main/macros.h"
  58 #include "main/context.h"
  59 #include "main/dd.h"
  60 #include "main/simple_list.h"
  61 #include "main/api_arrayelt.h"
  62 #include "swrast/swrast.h"
  63 #include "swrast_setup/swrast_setup.h"
  64 #include "vbo/vbo.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_vp_build.h"
  67 #include "radeon_reg.h"
  68 #include "radeon_macros.h"
  69 #include "radeon_ioctl.h"
  70 #include "radeon_state.h"
  71 #include "r300_context.h"
  72 #include "r300_ioctl.h"
  73 #include "r300_state.h"
  74 #include "r300_reg.h"
  75 #include "r300_tex.h"
  76 #include "r300_emit.h"
  77 #include "r300_fragprog.h"
  78 extern int future_hw_tcl_on;
  79
  80 /**
  81  * \brief Convert a OpenGL primitive type into a R300 primitive type.
  82  */
  83 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
  84 {
  85         switch (prim & PRIM_MODE_MASK) {
  86         case GL_POINTS:
  87                 return R300_VAP_VF_CNTL__PRIM_POINTS;
  88                 break;
  89         case GL_LINES:
  90                 return R300_VAP_VF_CNTL__PRIM_LINES;
  91                 break;
  92         case GL_LINE_STRIP:
  93                 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
  94                 break;
  95         case GL_LINE_LOOP:
  96                 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
  97                 break;
  98         case GL_TRIANGLES:
  99                 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
 100                 break;
 101         case GL_TRIANGLE_STRIP:
 102                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
 103                 break;
 104         case GL_TRIANGLE_FAN:
 105                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
 106                 break;
 107         case GL_QUADS:
 108                 return R300_VAP_VF_CNTL__PRIM_QUADS;
 109                 break;
 110         case GL_QUAD_STRIP:
 111                 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
 112                 break;
 113         case GL_POLYGON:
 114                 return R300_VAP_VF_CNTL__PRIM_POLYGON;
 115                 break;
 116         default:
 117                 assert(0);
 118                 return -1;
 119                 break;
 120         }
 121 }
 122
 123 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
 124 {
 125         int verts_off = 0;
 126
 127         switch (prim & PRIM_MODE_MASK) {
 128         case GL_POINTS:
 129                 verts_off = 0;
 130                 break;
 131         case GL_LINES:
 132                 verts_off = num_verts % 2;
 133                 break;
 134         case GL_LINE_STRIP:
 135                 if (num_verts < 2)
 136                         verts_off = num_verts;
 137                 break;
 138         case GL_LINE_LOOP:
 139                 if (num_verts < 2)
 140                         verts_off = num_verts;
 141                 break;
 142         case GL_TRIANGLES:
 143                 verts_off = num_verts % 3;
 144                 break;
 145         case GL_TRIANGLE_STRIP:
 146                 if (num_verts < 3)
 147                         verts_off = num_verts;
 148                 break;
 149         case GL_TRIANGLE_FAN:
 150                 if (num_verts < 3)
 151                         verts_off = num_verts;
 152                 break;
 153         case GL_QUADS:
 154                 verts_off = num_verts % 4;
 155                 break;
 156         case GL_QUAD_STRIP:
 157                 if (num_verts < 4)
 158                         verts_off = num_verts;
 159                 else
 160                         verts_off = num_verts % 2;
 161                 break;
 162         case GL_POLYGON:
 163                 if (num_verts < 3)
 164                         verts_off = num_verts;
 165                 break;
 166         default:
 167                 assert(0);
 168                 return -1;
 169                 break;
 170         }
 171
 172         return num_verts - verts_off;
 173 }
 174
 175 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
 176 {
 177         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 178         void *out;
 179
 180         rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
 181                                              0, n_elts * 4, 4,
 182                                              RADEON_GEM_DOMAIN_GTT, 0);
 183     rmesa->state.elt_dma_offset = 0;
 184     radeon_bo_map(rmesa->state.elt_dma_bo, 1);
 185         out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
 186         memcpy(out, elts, n_elts * 4);
 187     radeon_bo_unmap(rmesa->state.elt_dma_bo);
 188 }
 189
 190 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 191 {
 192         BATCH_LOCALS(rmesa);
 193
 194     if (vertex_count > 0) {
 195         BEGIN_BATCH(8);
 196         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
 197         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
 198                   ((vertex_count + 0) << 16) |
 199                   type |
 200                   R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
 201
 202     if (!rmesa->radeon.radeonScreen->kernel_mm) {
 203         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 204             OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 205         OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
 206                         rmesa->state.elt_dma_bo,
 207                         rmesa->state.elt_dma_offset,
 208                         RADEON_GEM_DOMAIN_GTT, 0, 0);
 209         OUT_BATCH(vertex_count);
 210     } else {
 211         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 212             OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 213         OUT_BATCH(rmesa->state.elt_dma_offset);
 214         OUT_BATCH(vertex_count);
 215         radeon_cs_write_reloc(rmesa->cmdbuf.cs,
 216                               rmesa->state.elt_dma_bo,
 217                               0,
 218                               rmesa->state.elt_dma_bo->size,
 219                               RADEON_GEM_DOMAIN_GTT, 0, 0);
 220     }
 221         END_BATCH();
 222     }
 223 }
 224
 225 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 226 {
 227         BATCH_LOCALS(rmesa);
 228     uint32_t voffset;
 229         int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 230         int i;
 231
 232         if (RADEON_DEBUG & DEBUG_VERTS)
 233                 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 234                         offset);
 235
 236         BEGIN_BATCH(sz+2);
 237         OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 238         OUT_BATCH(nr);
 239
 240
 241     if (!rmesa->radeon.radeonScreen->kernel_mm) {
 242         for (i = 0; i + 1 < nr; i += 2) {
 243                 OUT_BATCH((rmesa->state.aos[i].components << 0) |
 244                           (rmesa->state.aos[i].stride << 8) |
 245                           (rmesa->state.aos[i + 1].components << 16) |
 246                           (rmesa->state.aos[i + 1].stride << 24));
 247
 248         voffset =  rmesa->state.aos[i + 0].offset +
 249                    offset * 4 * rmesa->state.aos[i + 0].stride;
 250                 OUT_BATCH_RELOC(voffset,
 251                         rmesa->state.aos[i].bo,
 252                         voffset,
 253                         RADEON_GEM_DOMAIN_GTT,
 254                         0, 0);
 255         voffset =  rmesa->state.aos[i + 1].offset +
 256                    offset * 4 * rmesa->state.aos[i + 1].stride;
 257                 OUT_BATCH_RELOC(voffset,
 258                         rmesa->state.aos[i+1].bo,
 259                         voffset,
 260                         RADEON_GEM_DOMAIN_GTT,
 261                         0, 0);
 262         }
 263
 264         if (nr & 1) {
 265                 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 266                           (rmesa->state.aos[nr - 1].stride << 8));
 267         voffset =  rmesa->state.aos[nr - 1].offset +
 268                    offset * 4 * rmesa->state.aos[nr - 1].stride;
 269                 OUT_BATCH_RELOC(voffset,
 270                         rmesa->state.aos[nr - 1].bo,
 271                                     voffset,
 272                         RADEON_GEM_DOMAIN_GTT,
 273                         0, 0);
 274         }
 275     } else {
 276         for (i = 0; i + 1 < nr; i += 2) {
 277                 OUT_BATCH((rmesa->state.aos[i].components << 0) |
 278                           (rmesa->state.aos[i].stride << 8) |
 279                           (rmesa->state.aos[i + 1].components << 16) |
 280                           (rmesa->state.aos[i + 1].stride << 24));
 281
 282         voffset =  rmesa->state.aos[i + 0].offset +
 283                    offset * 4 * rmesa->state.aos[i + 0].stride;
 284                 OUT_BATCH(voffset);
 285         voffset =  rmesa->state.aos[i + 1].offset +
 286                    offset * 4 * rmesa->state.aos[i + 1].stride;
 287                 OUT_BATCH(voffset);
 288         }
 289
 290         if (nr & 1) {
 291                 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 292                           (rmesa->state.aos[nr - 1].stride << 8));
 293         voffset =  rmesa->state.aos[nr - 1].offset +
 294                    offset * 4 * rmesa->state.aos[nr - 1].stride;
 295                 OUT_BATCH(voffset);
 296         }
 297         for (i = 0; i + 1 < nr; i += 2) {
 298         voffset =  rmesa->state.aos[i + 0].offset +
 299                    offset * 4 * rmesa->state.aos[i + 0].stride;
 300         radeon_cs_write_reloc(rmesa->cmdbuf.cs,
 301                               rmesa->state.aos[i+0].bo,
 302                               voffset,
 303                               rmesa->state.aos[i+0].bo->size,
 304                               RADEON_GEM_DOMAIN_GTT,
 305                               0, 0);
 306         voffset =  rmesa->state.aos[i + 1].offset +
 307                    offset * 4 * rmesa->state.aos[i + 1].stride;
 308         radeon_cs_write_reloc(rmesa->cmdbuf.cs,
 309                               rmesa->state.aos[i+1].bo,
 310                               voffset,
 311                               rmesa->state.aos[i+1].bo->size,
 312                               RADEON_GEM_DOMAIN_GTT,
 313                               0, 0);
 314         }
 315         if (nr & 1) {
 316         voffset =  rmesa->state.aos[nr - 1].offset +
 317                    offset * 4 * rmesa->state.aos[nr - 1].stride;
 318         radeon_cs_write_reloc(rmesa->cmdbuf.cs,
 319                               rmesa->state.aos[nr-1].bo,
 320                               voffset,
 321                               rmesa->state.aos[nr-1].bo->size,
 322                               RADEON_GEM_DOMAIN_GTT,
 323                               0, 0);
 324         }
 325     }
 326         END_BATCH();
 327 }
 328
 329 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
 330 {
 331         BATCH_LOCALS(rmesa);
 332
 333         BEGIN_BATCH(3);
 334         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
 335         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
 336         END_BATCH();
 337 }
 338
 339 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 340                                    int start, int end, int prim)
 341 {
 342         BATCH_LOCALS(rmesa);
 343         int type, num_verts;
 344         TNLcontext *tnl = TNL_CONTEXT(ctx);
 345         struct vertex_buffer *vb = &tnl->vb;
 346
 347         type = r300PrimitiveType(rmesa, prim);
 348         num_verts = r300NumVerts(rmesa, end - start, prim);
 349
 350         if (type < 0 || num_verts <= 0)
 351                 return;
 352
 353         /* Make space for at least 64 dwords.
 354          * This is supposed to ensure that we can get all rendering
 355          * commands into a single command buffer.
 356          */
 357         r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__);
 358
 359         if (vb->Elts) {
 360                 if (num_verts > 65535) {
 361                         /* not implemented yet */
 362                         WARN_ONCE("Too many elts\n");
 363                         return;
 364                 }
 365                 /* Note: The following is incorrect, but it's the best I can do
 366                  * without a major refactoring of how DMA memory is handled.
 367                  * The problem: Ensuring that both vertex arrays *and* index
 368                  * arrays are at the right position, and then ensuring that
 369                  * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
 370                  * at once.
 371                  *
 372                  * So why is the following incorrect? Well, it seems like
 373                  * allocating the index array might actually evict the vertex
 374                  * arrays. *sigh*
 375                  */
 376                 r300EmitElts(ctx, vb->Elts, num_verts);
 377                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 378                 r300FireEB(rmesa, num_verts, type);
 379         } else {
 380                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 381                 r300FireAOS(rmesa, num_verts, type);
 382         }
 383         COMMIT_BATCH();
 384 }
 385
 386 static GLboolean r300RunRender(GLcontext * ctx,
 387                                struct tnl_pipeline_stage *stage)
 388 {
 389         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 390         int i;
 391         TNLcontext *tnl = TNL_CONTEXT(ctx);
 392         struct vertex_buffer *vb = &tnl->vb;
 393
 394         if (RADEON_DEBUG & DEBUG_PRIMS)
 395                 fprintf(stderr, "%s\n", __FUNCTION__);
 396
 397         r300UpdateShaders(rmesa);
 398         if (r300EmitArrays(ctx))
 399                 return GL_TRUE;
 400
 401         r300UpdateShaderStates(rmesa);
 402
 403         r300EmitCacheFlush(rmesa);
 404         r300EmitState(rmesa);
 405
 406         for (i = 0; i < vb->PrimitiveCount; i++) {
 407                 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 408                 GLuint start = vb->Primitive[i].start;
 409                 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 410                 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
 411         }
 412
 413         r300EmitCacheFlush(rmesa);
 414
 415         r300ReleaseArrays(ctx);
 416
 417         return GL_FALSE;
 418 }
 419
 420 #define FALLBACK_IF(expr)                                               \
 421         do {                                                            \
 422                 if (expr) {                                             \
 423                         if (1 || RADEON_DEBUG & DEBUG_FALLBACKS)        \
 424                                 WARN_ONCE("Software fallback:%s\n",     \
 425                                           #expr);                       \
 426                         return R300_FALLBACK_RAST;                      \
 427                 }                                                       \
 428         } while(0)
 429
 430 static int r300Fallback(GLcontext * ctx)
 431 {
 432         r300ContextPtr r300 = R300_CONTEXT(ctx);
 433         /* Do we need to use new-style shaders?
 434          * Also is there a better way to do this? */
 435         if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
 436                 struct r500_fragment_program *fp = (struct r500_fragment_program *)
 437             (char *)ctx->FragmentProgram._Current;
 438                 if (fp) {
 439                         if (!fp->translated) {
 440                                 r500TranslateFragmentShader(r300, fp);
 441                                 FALLBACK_IF(!fp->translated);
 442                         }
 443                 }
 444         } else {
 445                 struct r300_fragment_program *fp = (struct r300_fragment_program *)
 446             (char *)ctx->FragmentProgram._Current;
 447                 if (fp) {
 448                         if (!fp->translated) {
 449                                 r300TranslateFragmentShader(r300, fp);
 450                                 FALLBACK_IF(!fp->translated);
 451                         }
 452                 }
 453         }
 454
 455         FALLBACK_IF(ctx->RenderMode != GL_RENDER);
 456
 457         FALLBACK_IF(ctx->Stencil._TestTwoSide
 458                     && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
 459                         || ctx->Stencil.ValueMask[0] !=
 460                         ctx->Stencil.ValueMask[1]
 461                         || ctx->Stencil.WriteMask[0] !=
 462                         ctx->Stencil.WriteMask[1]));
 463
 464         if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
 465                 FALLBACK_IF(ctx->Point.PointSprite);
 466
 467         if (!r300->disable_lowimpact_fallback) {
 468                 FALLBACK_IF(ctx->Polygon.StippleFlag);
 469                 FALLBACK_IF(ctx->Multisample._Enabled);
 470                 FALLBACK_IF(ctx->Line.StippleFlag);
 471                 FALLBACK_IF(ctx->Line.SmoothFlag);
 472                 FALLBACK_IF(ctx->Point.SmoothFlag);
 473         }
 474
 475         return R300_FALLBACK_NONE;
 476 }
 477
 478 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
 479                                      struct tnl_pipeline_stage *stage)
 480 {
 481         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 482
 483         if (RADEON_DEBUG & DEBUG_PRIMS)
 484                 fprintf(stderr, "%s\n", __FUNCTION__);
 485
 486         if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
 487                 return GL_TRUE;
 488
 489         if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
 490                 return GL_TRUE;
 491
 492         return r300RunRender(ctx, stage);
 493 }
 494
 495 static GLboolean r300RunTCLRender(GLcontext * ctx,
 496                                   struct tnl_pipeline_stage *stage)
 497 {
 498         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 499         struct r300_vertex_program *vp;
 500
 501         hw_tcl_on = future_hw_tcl_on;
 502
 503         if (RADEON_DEBUG & DEBUG_PRIMS)
 504                 fprintf(stderr, "%s\n", __FUNCTION__);
 505
 506         if (hw_tcl_on == GL_FALSE)
 507                 return GL_TRUE;
 508
 509         if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
 510                 hw_tcl_on = GL_FALSE;
 511                 return GL_TRUE;
 512         }
 513
 514         r300UpdateShaders(rmesa);
 515
 516         vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 517         if (vp->native == GL_FALSE) {
 518                 hw_tcl_on = GL_FALSE;
 519                 return GL_TRUE;
 520         }
 521
 522         return r300RunRender(ctx, stage);
 523 }
 524
 525 const struct tnl_pipeline_stage _r300_render_stage = {
 526         "r300 Hardware Rasterization",
 527         NULL,
 528         NULL,
 529         NULL,
 530         NULL,
 531         r300RunNonTCLRender
 532 };
 533
 534 const struct tnl_pipeline_stage _r300_tcl_stage = {
 535         "r300 Hardware Transform, Clipping and Lighting",
 536         NULL,
 537         NULL,
 538         NULL,
 539         NULL,
 540         r300RunTCLRender
 541 };