src/mesa/drivers/dri/r300/r300_render.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4
   5 All Rights Reserved.
   6
   7 Permission is hereby granted, free of charge, to any person obtaining a
   8 copy of this software and associated documentation files (the "Software"),
   9 to deal in the Software without restriction, including without limitation
  10 on the rights to use, copy, modify, merge, publish, distribute, sub
  11 license, and/or sell copies of the Software, and to permit persons to whom
  12 the Software is furnished to do so, subject to the following conditions:
  13
  14 The above copyright notice and this permission notice (including the next
  15 paragraph) shall be included in all copies or substantial portions of the
  16 Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24 USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26 **************************************************************************/
  27
  28 /**
  29  * \file
  30  *
  31  * \brief R300 Render (Vertex Buffer Implementation)
  32  *
  33  * The immediate implementation has been removed from CVS in favor of the vertex
  34  * buffer implementation.
  35  *
  36  * The render functions are called by the pipeline manager to render a batch of
  37  * primitives. They return TRUE to pass on to the next stage (i.e. software
  38  * rasterization) or FALSE to indicate that the pipeline has finished after
  39  * rendering something.
  40  *
  41  * When falling back to software TCL still attempt to use hardware
  42  * rasterization.
  43  *
  44  * I am not sure that the cache related registers are setup correctly, but
  45  * obviously this does work... Further investigation is needed.
  46  *
  47  * \author Nicolai Haehnle <prefect_@gmx.net>
  48  *
  49  * \todo Add immediate implementation back? Perhaps this is useful if there are
  50  * no bugs...
  51  */
  52
  53 #include "main/glheader.h"
  54 #include "main/state.h"
  55 #include "main/imports.h"
  56 #include "main/enums.h"
  57 #include "main/macros.h"
  58 #include "main/context.h"
  59 #include "main/dd.h"
  60 #include "main/simple_list.h"
  61 #include "main/api_arrayelt.h"
  62 #include "swrast/swrast.h"
  63 #include "swrast_setup/swrast_setup.h"
  64 #include "vbo/vbo.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_vp_build.h"
  67 #include "radeon_reg.h"
  68 #include "radeon_macros.h"
  69 #include "radeon_ioctl.h"
  70 #include "radeon_state.h"
  71 #include "r300_context.h"
  72 #include "r300_ioctl.h"
  73 #include "r300_state.h"
  74 #include "r300_reg.h"
  75 #include "r300_tex.h"
  76 #include "r300_emit.h"
  77 #include "r300_fragprog.h"
  78 extern int future_hw_tcl_on;
  79
  80 /**
  81  * \brief Convert a OpenGL primitive type into a R300 primitive type.
  82  */
  83 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
  84 {
  85         switch (prim & PRIM_MODE_MASK) {
  86         case GL_POINTS:
  87                 return R300_VAP_VF_CNTL__PRIM_POINTS;
  88                 break;
  89         case GL_LINES:
  90                 return R300_VAP_VF_CNTL__PRIM_LINES;
  91                 break;
  92         case GL_LINE_STRIP:
  93                 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
  94                 break;
  95         case GL_LINE_LOOP:
  96                 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
  97                 break;
  98         case GL_TRIANGLES:
  99                 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
 100                 break;
 101         case GL_TRIANGLE_STRIP:
 102                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
 103                 break;
 104         case GL_TRIANGLE_FAN:
 105                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
 106                 break;
 107         case GL_QUADS:
 108                 return R300_VAP_VF_CNTL__PRIM_QUADS;
 109                 break;
 110         case GL_QUAD_STRIP:
 111                 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
 112                 break;
 113         case GL_POLYGON:
 114                 return R300_VAP_VF_CNTL__PRIM_POLYGON;
 115                 break;
 116         default:
 117                 assert(0);
 118                 return -1;
 119                 break;
 120         }
 121 }
 122
 123 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
 124 {
 125         int verts_off = 0;
 126
 127         switch (prim & PRIM_MODE_MASK) {
 128         case GL_POINTS:
 129                 verts_off = 0;
 130                 break;
 131         case GL_LINES:
 132                 verts_off = num_verts % 2;
 133                 break;
 134         case GL_LINE_STRIP:
 135                 if (num_verts < 2)
 136                         verts_off = num_verts;
 137                 break;
 138         case GL_LINE_LOOP:
 139                 if (num_verts < 2)
 140                         verts_off = num_verts;
 141                 break;
 142         case GL_TRIANGLES:
 143                 verts_off = num_verts % 3;
 144                 break;
 145         case GL_TRIANGLE_STRIP:
 146                 if (num_verts < 3)
 147                         verts_off = num_verts;
 148                 break;
 149         case GL_TRIANGLE_FAN:
 150                 if (num_verts < 3)
 151                         verts_off = num_verts;
 152                 break;
 153         case GL_QUADS:
 154                 verts_off = num_verts % 4;
 155                 break;
 156         case GL_QUAD_STRIP:
 157                 if (num_verts < 4)
 158                         verts_off = num_verts;
 159                 else
 160                         verts_off = num_verts % 2;
 161                 break;
 162         case GL_POLYGON:
 163                 if (num_verts < 3)
 164                         verts_off = num_verts;
 165                 break;
 166         default:
 167                 assert(0);
 168                 return -1;
 169                 break;
 170         }
 171
 172         return num_verts - verts_off;
 173 }
 174
 175 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
 176 {
 177         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 178         void *out;
 179
 180         rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
 181                                              0, n_elts * 4, 4,
 182                                              RADEON_GEM_DOMAIN_GTT, 0);
 183     rmesa->state.elt_dma_offset = 0;
 184     radeon_bo_map(rmesa->state.elt_dma_bo, 1);
 185         out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
 186         memcpy(out, elts, n_elts * 4);
 187     radeon_bo_unmap(rmesa->state.elt_dma_bo);
 188 }
 189
 190 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 191 {
 192         BATCH_LOCALS(&rmesa->radeon);
 193
 194         if (vertex_count > 0) {
 195                 BEGIN_BATCH(8);
 196                 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
 197                 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
 198                           ((vertex_count + 0) << 16) |
 199                           type |
 200                           R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
 201
 202                 if (!rmesa->radeon.radeonScreen->kernel_mm) {
 203                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 204                         OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 205                         OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
 206                                         rmesa->state.elt_dma_bo,
 207                                         rmesa->state.elt_dma_offset,
 208                                         RADEON_GEM_DOMAIN_GTT, 0, 0);
 209                         OUT_BATCH(vertex_count);
 210                 } else {
 211                         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 212                         OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 213                         OUT_BATCH(rmesa->state.elt_dma_offset);
 214                         OUT_BATCH(vertex_count);
 215                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 216                                               rmesa->state.elt_dma_bo,
 217                                               RADEON_GEM_DOMAIN_GTT, 0, 0);
 218                 }
 219                 END_BATCH();
 220         }
 221 }
 222
 223 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 224 {
 225         BATCH_LOCALS(&rmesa->radeon);
 226         uint32_t voffset;
 227         int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 228         int i;
 229
 230         if (RADEON_DEBUG & DEBUG_VERTS)
 231                 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 232                         offset);
 233
 234         BEGIN_BATCH(sz+2);
 235         OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 236         OUT_BATCH(nr);
 237
 238
 239         if (!rmesa->radeon.radeonScreen->kernel_mm) {
 240                 for (i = 0; i + 1 < nr; i += 2) {
 241                         OUT_BATCH((rmesa->state.aos[i].components << 0) |
 242                                   (rmesa->state.aos[i].stride << 8) |
 243                                   (rmesa->state.aos[i + 1].components << 16) |
 244                                   (rmesa->state.aos[i + 1].stride << 24));
 245
 246                         voffset =  rmesa->state.aos[i + 0].offset +
 247                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 248                         OUT_BATCH_RELOC(voffset,
 249                                         rmesa->state.aos[i].bo,
 250                                         voffset,
 251                         RADEON_GEM_DOMAIN_GTT,
 252                                         0, 0);
 253                         voffset =  rmesa->state.aos[i + 1].offset +
 254                                 offset * 4 * rmesa->state.aos[i + 1].stride;
 255                         OUT_BATCH_RELOC(voffset,
 256                                         rmesa->state.aos[i+1].bo,
 257                                         voffset,
 258                                         RADEON_GEM_DOMAIN_GTT,
 259                                         0, 0);
 260                 }
 261
 262                 if (nr & 1) {
 263                         OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 264                                   (rmesa->state.aos[nr - 1].stride << 8));
 265                         voffset =  rmesa->state.aos[nr - 1].offset +
 266                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 267                         OUT_BATCH_RELOC(voffset,
 268                                         rmesa->state.aos[nr - 1].bo,
 269                                         voffset,
 270                                         RADEON_GEM_DOMAIN_GTT,
 271                                         0, 0);
 272                 }
 273         } else {
 274                 for (i = 0; i + 1 < nr; i += 2) {
 275                         OUT_BATCH((rmesa->state.aos[i].components << 0) |
 276                                   (rmesa->state.aos[i].stride << 8) |
 277                                   (rmesa->state.aos[i + 1].components << 16) |
 278                                   (rmesa->state.aos[i + 1].stride << 24));
 279
 280                         voffset =  rmesa->state.aos[i + 0].offset +
 281                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 282                         OUT_BATCH(voffset);
 283                         voffset =  rmesa->state.aos[i + 1].offset +
 284                                 offset * 4 * rmesa->state.aos[i + 1].stride;
 285                         OUT_BATCH(voffset);
 286                 }
 287
 288                 if (nr & 1) {
 289                         OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 290                           (rmesa->state.aos[nr - 1].stride << 8));
 291                         voffset =  rmesa->state.aos[nr - 1].offset +
 292                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 293                         OUT_BATCH(voffset);
 294                 }
 295                 for (i = 0; i + 1 < nr; i += 2) {
 296                         voffset =  rmesa->state.aos[i + 0].offset +
 297                                 offset * 4 * rmesa->state.aos[i + 0].stride;
 298                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 299                                               rmesa->state.aos[i+0].bo,
 300                                               RADEON_GEM_DOMAIN_GTT,
 301                                               0, 0);
 302                         voffset =  rmesa->state.aos[i + 1].offset +
 303                                 offset * 4 * rmesa->state.aos[i + 1].stride;
 304                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 305                                               rmesa->state.aos[i+1].bo,
 306                                               RADEON_GEM_DOMAIN_GTT,
 307                                               0, 0);
 308                 }
 309                 if (nr & 1) {
 310                         voffset =  rmesa->state.aos[nr - 1].offset +
 311                                 offset * 4 * rmesa->state.aos[nr - 1].stride;
 312                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 313                                               rmesa->state.aos[nr-1].bo,
 314                                               RADEON_GEM_DOMAIN_GTT,
 315                                               0, 0);
 316                 }
 317         }
 318         END_BATCH();
 319 }
 320
 321 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
 322 {
 323         BATCH_LOCALS(&rmesa->radeon);
 324
 325         BEGIN_BATCH(3);
 326         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
 327         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
 328         END_BATCH();
 329 }
 330
 331 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 332                                    int start, int end, int prim)
 333 {
 334         BATCH_LOCALS(&rmesa->radeon);
 335         int type, num_verts;
 336         TNLcontext *tnl = TNL_CONTEXT(ctx);
 337         struct vertex_buffer *vb = &tnl->vb;
 338
 339         type = r300PrimitiveType(rmesa, prim);
 340         num_verts = r300NumVerts(rmesa, end - start, prim);
 341
 342         if (type < 0 || num_verts <= 0)
 343                 return;
 344
 345         /* Make space for at least 64 dwords.
 346          * This is supposed to ensure that we can get all rendering
 347          * commands into a single command buffer.
 348          */
 349         rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
 350
 351         if (vb->Elts) {
 352                 if (num_verts > 65535) {
 353                         /* not implemented yet */
 354                         WARN_ONCE("Too many elts\n");
 355                         return;
 356                 }
 357                 /* Note: The following is incorrect, but it's the best I can do
 358                  * without a major refactoring of how DMA memory is handled.
 359                  * The problem: Ensuring that both vertex arrays *and* index
 360                  * arrays are at the right position, and then ensuring that
 361                  * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
 362                  * at once.
 363                  *
 364                  * So why is the following incorrect? Well, it seems like
 365                  * allocating the index array might actually evict the vertex
 366                  * arrays. *sigh*
 367                  */
 368                 r300EmitElts(ctx, vb->Elts, num_verts);
 369                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 370                 r300FireEB(rmesa, num_verts, type);
 371         } else {
 372                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 373                 r300FireAOS(rmesa, num_verts, type);
 374         }
 375         COMMIT_BATCH();
 376 }
 377
 378 static GLboolean r300RunRender(GLcontext * ctx,
 379                                struct tnl_pipeline_stage *stage)
 380 {
 381         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 382         int i;
 383         TNLcontext *tnl = TNL_CONTEXT(ctx);
 384         struct vertex_buffer *vb = &tnl->vb;
 385
 386         if (RADEON_DEBUG & DEBUG_PRIMS)
 387                 fprintf(stderr, "%s\n", __FUNCTION__);
 388
 389         r300UpdateShaders(rmesa);
 390         if (r300EmitArrays(ctx))
 391                 return GL_TRUE;
 392
 393         r300UpdateShaderStates(rmesa);
 394
 395         r300EmitCacheFlush(rmesa);
 396         r300EmitState(rmesa);
 397
 398         for (i = 0; i < vb->PrimitiveCount; i++) {
 399                 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 400                 GLuint start = vb->Primitive[i].start;
 401                 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 402                 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
 403         }
 404
 405         r300EmitCacheFlush(rmesa);
 406
 407         r300ReleaseArrays(ctx);
 408
 409         return GL_FALSE;
 410 }
 411
 412 #define FALLBACK_IF(expr)                                               \
 413         do {                                                            \
 414                 if (expr) {                                             \
 415                         if (1 || RADEON_DEBUG & DEBUG_FALLBACKS)        \
 416                                 WARN_ONCE("Software fallback:%s\n",     \
 417                                           #expr);                       \
 418                         return R300_FALLBACK_RAST;                      \
 419                 }                                                       \
 420         } while(0)
 421
 422 static int r300Fallback(GLcontext * ctx)
 423 {
 424         r300ContextPtr r300 = R300_CONTEXT(ctx);
 425         /* Do we need to use new-style shaders?
 426          * Also is there a better way to do this? */
 427         if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
 428                 struct r500_fragment_program *fp = (struct r500_fragment_program *)
 429             (char *)ctx->FragmentProgram._Current;
 430                 if (fp) {
 431                         if (!fp->translated) {
 432                                 r500TranslateFragmentShader(r300, fp);
 433                                 FALLBACK_IF(!fp->translated);
 434                         }
 435                 }
 436         } else {
 437                 struct r300_fragment_program *fp = (struct r300_fragment_program *)
 438             (char *)ctx->FragmentProgram._Current;
 439                 if (fp) {
 440                         if (!fp->translated) {
 441                                 r300TranslateFragmentShader(r300, fp);
 442                                 FALLBACK_IF(!fp->translated);
 443                         }
 444                 }
 445         }
 446
 447         FALLBACK_IF(ctx->RenderMode != GL_RENDER);
 448
 449         FALLBACK_IF(ctx->Stencil._TestTwoSide
 450                     && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
 451                         || ctx->Stencil.ValueMask[0] !=
 452                         ctx->Stencil.ValueMask[1]
 453                         || ctx->Stencil.WriteMask[0] !=
 454                         ctx->Stencil.WriteMask[1]));
 455
 456         if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
 457                 FALLBACK_IF(ctx->Point.PointSprite);
 458
 459         if (!r300->disable_lowimpact_fallback) {
 460                 FALLBACK_IF(ctx->Polygon.StippleFlag);
 461                 FALLBACK_IF(ctx->Multisample._Enabled);
 462                 FALLBACK_IF(ctx->Line.StippleFlag);
 463                 FALLBACK_IF(ctx->Line.SmoothFlag);
 464                 FALLBACK_IF(ctx->Point.SmoothFlag);
 465         }
 466
 467         return R300_FALLBACK_NONE;
 468 }
 469
 470 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
 471                                      struct tnl_pipeline_stage *stage)
 472 {
 473         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 474
 475         if (RADEON_DEBUG & DEBUG_PRIMS)
 476                 fprintf(stderr, "%s\n", __FUNCTION__);
 477
 478         if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
 479                 return GL_TRUE;
 480
 481         if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
 482                 return GL_TRUE;
 483
 484         return r300RunRender(ctx, stage);
 485 }
 486
 487 static GLboolean r300RunTCLRender(GLcontext * ctx,
 488                                   struct tnl_pipeline_stage *stage)
 489 {
 490         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 491         struct r300_vertex_program *vp;
 492
 493         hw_tcl_on = future_hw_tcl_on;
 494
 495         if (RADEON_DEBUG & DEBUG_PRIMS)
 496                 fprintf(stderr, "%s\n", __FUNCTION__);
 497
 498         if (hw_tcl_on == GL_FALSE)
 499                 return GL_TRUE;
 500
 501         if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
 502                 hw_tcl_on = GL_FALSE;
 503                 return GL_TRUE;
 504         }
 505
 506         r300UpdateShaders(rmesa);
 507
 508         vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 509         if (vp->native == GL_FALSE) {
 510                 hw_tcl_on = GL_FALSE;
 511                 return GL_TRUE;
 512         }
 513
 514         return r300RunRender(ctx, stage);
 515 }
 516
 517 const struct tnl_pipeline_stage _r300_render_stage = {
 518         "r300 Hardware Rasterization",
 519         NULL,
 520         NULL,
 521         NULL,
 522         NULL,
 523         r300RunNonTCLRender
 524 };
 525
 526 const struct tnl_pipeline_stage _r300_tcl_stage = {
 527         "r300 Hardware Transform, Clipping and Lighting",
 528         NULL,
 529         NULL,
 530         NULL,
 531         NULL,
 532         r300RunTCLRender
 533 };