src/mesa/drivers/dri/r300/r300_render.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4
   5 All Rights Reserved.
   6
   7 Permission is hereby granted, free of charge, to any person obtaining a
   8 copy of this software and associated documentation files (the "Software"),
   9 to deal in the Software without restriction, including without limitation
  10 on the rights to use, copy, modify, merge, publish, distribute, sub
  11 license, and/or sell copies of the Software, and to permit persons to whom
  12 the Software is furnished to do so, subject to the following conditions:
  13
  14 The above copyright notice and this permission notice (including the next
  15 paragraph) shall be included in all copies or substantial portions of the
  16 Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24 USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26 **************************************************************************/
  27
  28 /**
  29  * \file
  30  *
  31  * \brief R300 Render (Vertex Buffer Implementation)
  32  *
  33  * The immediate implementation has been removed from CVS in favor of the vertex
  34  * buffer implementation.
  35  *
  36  * The render functions are called by the pipeline manager to render a batch of
  37  * primitives. They return TRUE to pass on to the next stage (i.e. software
  38  * rasterization) or FALSE to indicate that the pipeline has finished after
  39  * rendering something.
  40  *
  41  * When falling back to software TCL still attempt to use hardware
  42  * rasterization.
  43  *
  44  * I am not sure that the cache related registers are setup correctly, but
  45  * obviously this does work... Further investigation is needed.
  46  *
  47  * \author Nicolai Haehnle <prefect_@gmx.net>
  48  *
  49  * \todo Add immediate implementation back? Perhaps this is useful if there are
  50  * no bugs...
  51  */
  52
  53 #include "main/glheader.h"
  54 #include "main/state.h"
  55 #include "main/imports.h"
  56 #include "main/enums.h"
  57 #include "main/macros.h"
  58 #include "main/context.h"
  59 #include "main/dd.h"
  60 #include "main/simple_list.h"
  61 #include "main/api_arrayelt.h"
  62 #include "swrast/swrast.h"
  63 #include "swrast_setup/swrast_setup.h"
  64 #include "vbo/vbo.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_vp_build.h"
  67 #include "radeon_reg.h"
  68 #include "radeon_macros.h"
  69 #include "radeon_ioctl.h"
  70 #include "radeon_state.h"
  71 #include "r300_context.h"
  72 #include "r300_ioctl.h"
  73 #include "r300_state.h"
  74 #include "r300_reg.h"
  75 #include "r300_tex.h"
  76 #include "r300_emit.h"
  77 #include "r300_fragprog.h"
  78 extern int future_hw_tcl_on;
  79
  80 /**
  81  * \brief Convert a OpenGL primitive type into a R300 primitive type.
  82  */
  83 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
  84 {
  85         switch (prim & PRIM_MODE_MASK) {
  86         case GL_POINTS:
  87                 return R300_VAP_VF_CNTL__PRIM_POINTS;
  88                 break;
  89         case GL_LINES:
  90                 return R300_VAP_VF_CNTL__PRIM_LINES;
  91                 break;
  92         case GL_LINE_STRIP:
  93                 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
  94                 break;
  95         case GL_LINE_LOOP:
  96                 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
  97                 break;
  98         case GL_TRIANGLES:
  99                 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
 100                 break;
 101         case GL_TRIANGLE_STRIP:
 102                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
 103                 break;
 104         case GL_TRIANGLE_FAN:
 105                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
 106                 break;
 107         case GL_QUADS:
 108                 return R300_VAP_VF_CNTL__PRIM_QUADS;
 109                 break;
 110         case GL_QUAD_STRIP:
 111                 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
 112                 break;
 113         case GL_POLYGON:
 114                 return R300_VAP_VF_CNTL__PRIM_POLYGON;
 115                 break;
 116         default:
 117                 assert(0);
 118                 return -1;
 119                 break;
 120         }
 121 }
 122
 123 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
 124 {
 125         int verts_off = 0;
 126
 127         switch (prim & PRIM_MODE_MASK) {
 128         case GL_POINTS:
 129                 verts_off = 0;
 130                 break;
 131         case GL_LINES:
 132                 verts_off = num_verts % 2;
 133                 break;
 134         case GL_LINE_STRIP:
 135                 if (num_verts < 2)
 136                         verts_off = num_verts;
 137                 break;
 138         case GL_LINE_LOOP:
 139                 if (num_verts < 2)
 140                         verts_off = num_verts;
 141                 break;
 142         case GL_TRIANGLES:
 143                 verts_off = num_verts % 3;
 144                 break;
 145         case GL_TRIANGLE_STRIP:
 146                 if (num_verts < 3)
 147                         verts_off = num_verts;
 148                 break;
 149         case GL_TRIANGLE_FAN:
 150                 if (num_verts < 3)
 151                         verts_off = num_verts;
 152                 break;
 153         case GL_QUADS:
 154                 verts_off = num_verts % 4;
 155                 break;
 156         case GL_QUAD_STRIP:
 157                 if (num_verts < 4)
 158                         verts_off = num_verts;
 159                 else
 160                         verts_off = num_verts % 2;
 161                 break;
 162         case GL_POLYGON:
 163                 if (num_verts < 3)
 164                         verts_off = num_verts;
 165                 break;
 166         default:
 167                 assert(0);
 168                 return -1;
 169                 break;
 170         }
 171
 172         return num_verts - verts_off;
 173 }
 174
 175 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
 176 {
 177         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 178         void *out;
 179
 180         rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
 181                                              0, n_elts * 4, 4,
 182                                              RADEON_GEM_DOMAIN_GTT);
 183     rmesa->state.elt_dma_offset = 0;
 184     radeon_bo_map(rmesa->state.elt_dma_bo, 1);
 185         out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
 186         memcpy(out, elts, n_elts * 4);
 187     radeon_bo_unmap(rmesa->state.elt_dma_bo);
 188 }
 189
 190 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 191 {
 192         BATCH_LOCALS(rmesa);
 193
 194     if (vertex_count > 0) {
 195         BEGIN_BATCH(8);
 196         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
 197         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
 198                   ((vertex_count + 0) << 16) |
 199                   type |
 200                   R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
 201
 202         OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
 203             OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 204         OUT_BATCH_RELOC(0, rmesa->state.elt_dma_bo,
 205                         rmesa->state.elt_dma_offset, 0);
 206         OUT_BATCH(vertex_count);
 207         END_BATCH();
 208     }
 209 }
 210
 211 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 212 {
 213         BATCH_LOCALS(rmesa);
 214     uint32_t voffset;
 215         int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 216         int i;
 217
 218         if (RADEON_DEBUG & DEBUG_VERTS)
 219                 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 220                         offset);
 221
 222         BEGIN_BATCH(sz+2);
 223         OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
 224         OUT_BATCH(nr);
 225
 226         for (i = 0; i + 1 < nr; i += 2) {
 227                 OUT_BATCH((rmesa->state.aos[i].components << 0) |
 228                           (rmesa->state.aos[i].stride << 8) |
 229                           (rmesa->state.aos[i + 1].components << 16) |
 230                           (rmesa->state.aos[i + 1].stride << 24));
 231
 232         voffset =  rmesa->state.aos[i + 0].offset +
 233                    offset * 4 * rmesa->state.aos[i + 0].stride;
 234                 OUT_BATCH_RELOC(0, rmesa->state.aos[i].bo, voffset, 0);
 235         voffset =  rmesa->state.aos[i + 1].offset +
 236                    offset * 4 * rmesa->state.aos[i + 1].stride;
 237                 OUT_BATCH_RELOC(0, rmesa->state.aos[i+1].bo, voffset, 0);
 238         }
 239
 240         if (nr & 1) {
 241                 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
 242                           (rmesa->state.aos[nr - 1].stride << 8));
 243                 OUT_BATCH_RELOC(0, rmesa->state.aos[nr - 1].bo,
 244                         rmesa->state.aos[nr - 1].offset + offset * 4 * rmesa->state.aos[nr - 1].stride, 0);
 245         }
 246         END_BATCH();
 247 }
 248
 249 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
 250 {
 251         BATCH_LOCALS(rmesa);
 252
 253         BEGIN_BATCH(3);
 254         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
 255         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
 256         END_BATCH();
 257 }
 258
 259 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 260                                    int start, int end, int prim)
 261 {
 262         BATCH_LOCALS(rmesa);
 263         int type, num_verts;
 264         TNLcontext *tnl = TNL_CONTEXT(ctx);
 265         struct vertex_buffer *vb = &tnl->vb;
 266
 267         type = r300PrimitiveType(rmesa, prim);
 268         num_verts = r300NumVerts(rmesa, end - start, prim);
 269
 270         if (type < 0 || num_verts <= 0)
 271                 return;
 272
 273         /* Make space for at least 64 dwords.
 274          * This is supposed to ensure that we can get all rendering
 275          * commands into a single command buffer.
 276          */
 277         r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__);
 278
 279         if (vb->Elts) {
 280                 if (num_verts > 65535) {
 281                         /* not implemented yet */
 282                         WARN_ONCE("Too many elts\n");
 283                         return;
 284                 }
 285                 /* Note: The following is incorrect, but it's the best I can do
 286                  * without a major refactoring of how DMA memory is handled.
 287                  * The problem: Ensuring that both vertex arrays *and* index
 288                  * arrays are at the right position, and then ensuring that
 289                  * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
 290                  * at once.
 291                  *
 292                  * So why is the following incorrect? Well, it seems like
 293                  * allocating the index array might actually evict the vertex
 294                  * arrays. *sigh*
 295                  */
 296                 r300EmitElts(ctx, vb->Elts, num_verts);
 297                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 298                 r300FireEB(rmesa, num_verts, type);
 299         } else {
 300                 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 301                 r300FireAOS(rmesa, num_verts, type);
 302         }
 303         COMMIT_BATCH();
 304 }
 305
 306 static GLboolean r300RunRender(GLcontext * ctx,
 307                                struct tnl_pipeline_stage *stage)
 308 {
 309         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 310         int i;
 311         TNLcontext *tnl = TNL_CONTEXT(ctx);
 312         struct vertex_buffer *vb = &tnl->vb;
 313
 314         if (RADEON_DEBUG & DEBUG_PRIMS)
 315                 fprintf(stderr, "%s\n", __FUNCTION__);
 316
 317         r300UpdateShaders(rmesa);
 318         if (r300EmitArrays(ctx))
 319                 return GL_TRUE;
 320
 321         r300UpdateShaderStates(rmesa);
 322
 323         r300EmitCacheFlush(rmesa);
 324         r300EmitState(rmesa);
 325
 326         for (i = 0; i < vb->PrimitiveCount; i++) {
 327                 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 328                 GLuint start = vb->Primitive[i].start;
 329                 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 330                 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
 331         }
 332
 333         r300EmitCacheFlush(rmesa);
 334
 335         r300ReleaseArrays(ctx);
 336
 337         return GL_FALSE;
 338 }
 339
 340 #define FALLBACK_IF(expr)                                               \
 341         do {                                                            \
 342                 if (expr) {                                             \
 343                         if (1 || RADEON_DEBUG & DEBUG_FALLBACKS)        \
 344                                 WARN_ONCE("Software fallback:%s\n",     \
 345                                           #expr);                       \
 346                         return R300_FALLBACK_RAST;                      \
 347                 }                                                       \
 348         } while(0)
 349
 350 static int r300Fallback(GLcontext * ctx)
 351 {
 352         r300ContextPtr r300 = R300_CONTEXT(ctx);
 353         /* Do we need to use new-style shaders?
 354          * Also is there a better way to do this? */
 355         if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
 356                 struct r500_fragment_program *fp = (struct r500_fragment_program *)
 357             (char *)ctx->FragmentProgram._Current;
 358                 if (fp) {
 359                         if (!fp->translated) {
 360                                 r500TranslateFragmentShader(r300, fp);
 361                                 FALLBACK_IF(!fp->translated);
 362                         }
 363                 }
 364         } else {
 365                 struct r300_fragment_program *fp = (struct r300_fragment_program *)
 366             (char *)ctx->FragmentProgram._Current;
 367                 if (fp) {
 368                         if (!fp->translated) {
 369                                 r300TranslateFragmentShader(r300, fp);
 370                                 FALLBACK_IF(!fp->translated);
 371                         }
 372                 }
 373         }
 374
 375         FALLBACK_IF(ctx->RenderMode != GL_RENDER);
 376
 377         FALLBACK_IF(ctx->Stencil._TestTwoSide
 378                     && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
 379                         || ctx->Stencil.ValueMask[0] !=
 380                         ctx->Stencil.ValueMask[1]
 381                         || ctx->Stencil.WriteMask[0] !=
 382                         ctx->Stencil.WriteMask[1]));
 383
 384         if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
 385                 FALLBACK_IF(ctx->Point.PointSprite);
 386
 387         if (!r300->disable_lowimpact_fallback) {
 388                 FALLBACK_IF(ctx->Polygon.StippleFlag);
 389                 FALLBACK_IF(ctx->Multisample._Enabled);
 390                 FALLBACK_IF(ctx->Line.StippleFlag);
 391                 FALLBACK_IF(ctx->Line.SmoothFlag);
 392                 FALLBACK_IF(ctx->Point.SmoothFlag);
 393         }
 394
 395         return R300_FALLBACK_NONE;
 396 }
 397
 398 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
 399                                      struct tnl_pipeline_stage *stage)
 400 {
 401         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 402
 403         if (RADEON_DEBUG & DEBUG_PRIMS)
 404                 fprintf(stderr, "%s\n", __FUNCTION__);
 405
 406         if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
 407                 return GL_TRUE;
 408
 409         if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
 410                 return GL_TRUE;
 411
 412         return r300RunRender(ctx, stage);
 413 }
 414
 415 static GLboolean r300RunTCLRender(GLcontext * ctx,
 416                                   struct tnl_pipeline_stage *stage)
 417 {
 418         r300ContextPtr rmesa = R300_CONTEXT(ctx);
 419         struct r300_vertex_program *vp;
 420
 421         hw_tcl_on = future_hw_tcl_on;
 422
 423         if (RADEON_DEBUG & DEBUG_PRIMS)
 424                 fprintf(stderr, "%s\n", __FUNCTION__);
 425
 426         if (hw_tcl_on == GL_FALSE)
 427                 return GL_TRUE;
 428
 429         if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
 430                 hw_tcl_on = GL_FALSE;
 431                 return GL_TRUE;
 432         }
 433
 434         r300UpdateShaders(rmesa);
 435
 436         vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 437         if (vp->native == GL_FALSE) {
 438                 hw_tcl_on = GL_FALSE;
 439                 return GL_TRUE;
 440         }
 441
 442         return r300RunRender(ctx, stage);
 443 }
 444
 445 const struct tnl_pipeline_stage _r300_render_stage = {
 446         "r300 Hardware Rasterization",
 447         NULL,
 448         NULL,
 449         NULL,
 450         NULL,
 451         r300RunNonTCLRender
 452 };
 453
 454 const struct tnl_pipeline_stage _r300_tcl_stage = {
 455         "r300 Hardware Transform, Clipping and Lighting",
 456         NULL,
 457         NULL,
 458         NULL,
 459         NULL,
 460         r300RunTCLRender
 461 };