src/mesa/drivers/dri/r600/r700_render.c

   1 /*
   2  * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  */
  21
  22 /*
  23  * Authors:
  24  *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
  25  *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
  26  */
  27
  28 #include "main/glheader.h"
  29 #include "main/state.h"
  30 #include "main/imports.h"
  31 #include "main/enums.h"
  32 #include "main/macros.h"
  33 #include "main/context.h"
  34 #include "main/dd.h"
  35 #include "main/simple_list.h"
  36 #include "main/api_arrayelt.h"
  37 #include "swrast/swrast.h"
  38 #include "swrast_setup/swrast_setup.h"
  39 #include "vbo/vbo.h"
  40
  41 #include "tnl/tnl.h"
  42 #include "tnl/t_vp_build.h"
  43 #include "tnl/t_context.h"
  44 #include "tnl/t_vertex.h"
  45 #include "tnl/t_pipeline.h"
  46
  47 #include "r600_context.h"
  48 #include "r600_cmdbuf.h"
  49
  50 #include "r600_tex.h"
  51
  52 #include "r700_vertprog.h"
  53 #include "r700_fragprog.h"
  54 #include "r700_state.h"
  55
  56 #include "radeon_common_context.h"
  57
  58 void r700WaitForIdle(context_t *context);
  59 void r700WaitForIdleClean(context_t *context);
  60 GLboolean r700SendTextureState(context_t *context);
  61 static unsigned int r700PrimitiveType(int prim);
  62 void r600UpdateTextureState(GLcontext * ctx);
  63 GLboolean r700SyncSurf(context_t *context,
  64                        struct radeon_bo *pbo,
  65                        uint32_t read_domain,
  66                        uint32_t write_domain,
  67                        uint32_t sync_type);
  68
  69 void r700WaitForIdle(context_t *context)
  70 {
  71     BATCH_LOCALS(&context->radeon);
  72     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
  73     BEGIN_BATCH_NO_AUTOSTATE(3);
  74
  75     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
  76     R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
  77     R600_OUT_BATCH(WAIT_3D_IDLE_bit);
  78
  79     END_BATCH();
  80     COMMIT_BATCH();
  81 }
  82
  83 void r700WaitForIdleClean(context_t *context)
  84 {
  85     BATCH_LOCALS(&context->radeon);
  86     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
  87     BEGIN_BATCH_NO_AUTOSTATE(5);
  88
  89     R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
  90     R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
  91
  92     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
  93     R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
  94     R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
  95
  96     END_BATCH();
  97     COMMIT_BATCH();
  98 }
  99
 100 void r700Start3D(context_t *context)
 101 {
 102     BATCH_LOCALS(&context->radeon);
 103     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
 104     if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
 105     {
 106         BEGIN_BATCH_NO_AUTOSTATE(2);
 107         R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
 108         R600_OUT_BATCH(0);
 109         END_BATCH();
 110     }
 111
 112     BEGIN_BATCH_NO_AUTOSTATE(3);
 113     R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
 114     R600_OUT_BATCH(0x80000000);
 115     R600_OUT_BATCH(0x80000000);
 116     END_BATCH();
 117
 118     COMMIT_BATCH();
 119
 120     r700WaitForIdleClean(context);
 121 }
 122
 123 GLboolean r700SyncSurf(context_t *context,
 124                        struct radeon_bo *pbo,
 125                        uint32_t read_domain,
 126                        uint32_t write_domain,
 127                        uint32_t sync_type)
 128 {
 129     BATCH_LOCALS(&context->radeon);
 130     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
 131     uint32_t cp_coher_size;
 132
 133     if (!pbo)
 134             return GL_FALSE;
 135
 136     if (pbo->size == 0xffffffff)
 137             cp_coher_size = 0xffffffff;
 138     else
 139             cp_coher_size = ((pbo->size + 255) >> 8);
 140
 141     BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
 142     R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
 143     R600_OUT_BATCH(sync_type);
 144     R600_OUT_BATCH(cp_coher_size);
 145     R600_OUT_BATCH(0);
 146     R600_OUT_BATCH(10);
 147     R600_OUT_BATCH_RELOC(0,
 148                          pbo,
 149                          0,
 150                          read_domain, write_domain, 0);
 151     END_BATCH();
 152     COMMIT_BATCH();
 153
 154     return GL_TRUE;
 155 }
 156
 157 static unsigned int r700PrimitiveType(int prim)
 158 {
 159     switch (prim & PRIM_MODE_MASK)
 160     {
 161     case GL_POINTS:
 162         return DI_PT_POINTLIST;
 163         break;
 164     case GL_LINES:
 165         return DI_PT_LINELIST;
 166         break;
 167     case GL_LINE_STRIP:
 168         return DI_PT_LINESTRIP;
 169         break;
 170     case GL_LINE_LOOP:
 171         return DI_PT_LINELOOP;
 172         break;
 173     case GL_TRIANGLES:
 174         return DI_PT_TRILIST;
 175         break;
 176     case GL_TRIANGLE_STRIP:
 177         return DI_PT_TRISTRIP;
 178         break;
 179     case GL_TRIANGLE_FAN:
 180         return DI_PT_TRIFAN;
 181         break;
 182     case GL_QUADS:
 183         return DI_PT_QUADLIST;
 184         break;
 185     case GL_QUAD_STRIP:
 186         return DI_PT_QUADSTRIP;
 187         break;
 188     case GL_POLYGON:
 189         return DI_PT_POLYGON;
 190         break;
 191     default:
 192         assert(0);
 193         return -1;
 194         break;
 195     }
 196 }
 197
 198 static int r700NumVerts(int num_verts, int prim)
 199 {
 200         int verts_off = 0;
 201
 202         switch (prim & PRIM_MODE_MASK) {
 203         case GL_POINTS:
 204                 verts_off = 0;
 205                 break;
 206         case GL_LINES:
 207                 verts_off = num_verts % 2;
 208                 break;
 209         case GL_LINE_STRIP:
 210                 if (num_verts < 2)
 211                         verts_off = num_verts;
 212                 break;
 213         case GL_LINE_LOOP:
 214                 if (num_verts < 2)
 215                         verts_off = num_verts;
 216                 break;
 217         case GL_TRIANGLES:
 218                 verts_off = num_verts % 3;
 219                 break;
 220         case GL_TRIANGLE_STRIP:
 221                 if (num_verts < 3)
 222                         verts_off = num_verts;
 223                 break;
 224         case GL_TRIANGLE_FAN:
 225                 if (num_verts < 3)
 226                         verts_off = num_verts;
 227                 break;
 228         case GL_QUADS:
 229                 verts_off = num_verts % 4;
 230                 break;
 231         case GL_QUAD_STRIP:
 232                 if (num_verts < 4)
 233                         verts_off = num_verts;
 234                 else
 235                         verts_off = num_verts % 2;
 236                 break;
 237         case GL_POLYGON:
 238                 if (num_verts < 3)
 239                         verts_off = num_verts;
 240                 break;
 241         default:
 242                 assert(0);
 243                 return -1;
 244                 break;
 245         }
 246
 247         return num_verts - verts_off;
 248 }
 249
 250 static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
 251 {
 252         context_t *context = R700_CONTEXT(ctx);
 253         BATCH_LOCALS(&context->radeon);
 254         int type, i, total_emit;
 255         int num_indices;
 256         uint32_t vgt_draw_initiator = 0;
 257         uint32_t vgt_index_type     = 0;
 258         uint32_t vgt_primitive_type = 0;
 259         uint32_t vgt_num_indices    = 0;
 260         TNLcontext *tnl = TNL_CONTEXT(ctx);
 261         struct vertex_buffer *vb = &tnl->vb;
 262
 263         type = r700PrimitiveType(prim);
 264         num_indices = r700NumVerts(end - start, prim);
 265
 266         radeon_print(RADEON_RENDER, RADEON_TRACE,
 267                 "%s type %x num_indices %d\n",
 268                 __func__, type, num_indices);
 269
 270         if (type < 0 || num_indices <= 0)
 271                 return;
 272
 273         total_emit =   3 /* VGT_PRIMITIVE_TYPE */
 274                      + 2 /* VGT_INDEX_TYPE */
 275                      + 2 /* NUM_INSTANCES */
 276                      + num_indices + 3; /* DRAW_INDEX_IMMD */
 277
 278         BEGIN_BATCH_NO_AUTOSTATE(total_emit);
 279         // prim
 280         SETfield(vgt_primitive_type, type,
 281                  VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
 282         R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
 283         R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
 284         R600_OUT_BATCH(vgt_primitive_type);
 285
 286         // index type
 287         SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
 288         R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
 289         R600_OUT_BATCH(vgt_index_type);
 290
 291         // num instances
 292         R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
 293         R600_OUT_BATCH(1);
 294
 295         // draw packet
 296         vgt_num_indices = num_indices;
 297         SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
 298         SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
 299
 300         R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
 301         R600_OUT_BATCH(vgt_num_indices);
 302         R600_OUT_BATCH(vgt_draw_initiator);
 303
 304         for (i = start; i < (start + num_indices); i++) {
 305                 if(vb->Elts)
 306                         R600_OUT_BATCH(vb->Elts[i]);
 307                 else
 308                         R600_OUT_BATCH(i);
 309         }
 310         END_BATCH();
 311         COMMIT_BATCH();
 312
 313 }
 314
 315 /* start 3d, idle, cb/db flush */
 316 #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
 317
 318 static GLuint r700PredictRenderSize(GLcontext* ctx)
 319 {
 320     context_t *context = R700_CONTEXT(ctx);
 321     TNLcontext *tnl = TNL_CONTEXT(ctx);
 322     struct r700_vertex_program *vp = context->selected_vp;
 323     struct vertex_buffer *vb = &tnl->vb;
 324     GLboolean flushed;
 325     GLuint dwords, i;
 326     GLuint state_size;
 327     /* pre calculate aos count so state prediction works */
 328     context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead);
 329
 330     dwords = PRE_EMIT_STATE_BUFSZ;
 331     for (i = 0; i < vb->PrimitiveCount; i++)
 332         dwords += vb->Primitive[i].count + 10;
 333     state_size = radeonCountStateEmitSize(&context->radeon);
 334     flushed = rcommonEnsureCmdBufSpace(&context->radeon,
 335             dwords + state_size, __FUNCTION__);
 336
 337     if (flushed)
 338         dwords += radeonCountStateEmitSize(&context->radeon);
 339     else
 340         dwords += state_size;
 341
 342     radeon_print(RADEON_RENDER, RADEON_VERBOSE,
 343         "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
 344     return dwords;
 345 }
 346
 347 static GLboolean r700RunRender(GLcontext * ctx,
 348                                struct tnl_pipeline_stage *stage)
 349 {
 350     context_t *context = R700_CONTEXT(ctx);
 351     radeonContextPtr radeon = &context->radeon;
 352     unsigned int i, id = 0;
 353     TNLcontext *tnl = TNL_CONTEXT(ctx);
 354     struct vertex_buffer *vb = &tnl->vb;
 355     struct radeon_renderbuffer *rrb;
 356
 357     radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n",
 358                 __func__, context->radeon.cmdbuf.cs->cdw);
 359
 360     /* always emit CB base to prevent
 361      * lock ups on some chips.
 362      */
 363     R600_STATECHANGE(context, cb_target);
 364     /* mark vtx as dirty since it changes per-draw */
 365     R600_STATECHANGE(context, vtx);
 366
 367     r700SetScissor(context);
 368     r700SetupVertexProgram(ctx);
 369     r700SetupFragmentProgram(ctx);
 370     r600UpdateTextureState(ctx);
 371
 372     GLuint emit_end = r700PredictRenderSize(ctx)
 373         + context->radeon.cmdbuf.cs->cdw;
 374     r700SetupStreams(ctx);
 375
 376     radeonEmitState(radeon);
 377
 378     radeon_debug_add_indent();
 379     /* richard test code */
 380     for (i = 0; i < vb->PrimitiveCount; i++) {
 381         GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 382         GLuint start = vb->Primitive[i].start;
 383         GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 384         r700RunRenderPrimitive(ctx, start, end, prim);
 385     }
 386     radeon_debug_remove_indent();
 387
 388     /* Flush render op cached for last several quads. */
 389     r700WaitForIdleClean(context);
 390
 391     rrb = radeon_get_colorbuffer(&context->radeon);
 392     if (rrb && rrb->bo)
 393             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
 394                          CB_ACTION_ENA_bit | (1 << (id + 6)));
 395
 396     rrb = radeon_get_depthbuffer(&context->radeon);
 397     if (rrb && rrb->bo)
 398             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
 399                          DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
 400
 401     radeonReleaseArrays(ctx, ~0);
 402
 403     radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n",
 404                 __func__, context->radeon.cmdbuf.cs->cdw);
 405
 406     if ( emit_end < context->radeon.cmdbuf.cs->cdw )
 407        WARN_ONCE("Rendering was %d commands larger than predicted size."
 408                " We might overflow  command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
 409
 410     return GL_FALSE;
 411 }
 412
 413 static GLboolean r700RunNonTCLRender(GLcontext * ctx,
 414                                      struct tnl_pipeline_stage *stage) /* -------------------- */
 415 {
 416         GLboolean bRet = GL_TRUE;
 417
 418         return bRet;
 419 }
 420
 421 static GLboolean r700RunTCLRender(GLcontext * ctx,  /*----------------------*/
 422                                   struct tnl_pipeline_stage *stage)
 423 {
 424         GLboolean bRet = GL_FALSE;
 425
 426     /* TODO : sw fallback */
 427
 428     /* Need shader bo's setup before bo check */
 429     r700UpdateShaders(ctx);
 430     /**
 431
 432     * Ensure all enabled and complete textures are uploaded along with any buffers being used.
 433     */
 434     if(!r600ValidateBuffers(ctx))
 435     {
 436         return GL_TRUE;
 437     }
 438
 439     bRet = r700RunRender(ctx, stage);
 440
 441     return bRet;
 442         //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
 443     //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
 444 }
 445
 446 const struct tnl_pipeline_stage _r700_render_stage = {
 447         "r700 Hardware Rasterization",
 448         NULL,
 449         NULL,
 450         NULL,
 451         NULL,
 452         r700RunNonTCLRender
 453 };
 454
 455 const struct tnl_pipeline_stage _r700_tcl_stage = {
 456         "r700 Hardware Transform, Clipping and Lighting",
 457         NULL,
 458         NULL,
 459         NULL,
 460         NULL,
 461         r700RunTCLRender
 462 };
 463
 464 const struct tnl_pipeline_stage *r700_pipeline[] =
 465 {
 466     &_r700_tcl_stage,
 467     &_tnl_vertex_transform_stage,
 468         &_tnl_normal_transform_stage,
 469         &_tnl_lighting_stage,
 470         &_tnl_fog_coordinate_stage,
 471         &_tnl_texgen_stage,
 472         &_tnl_texture_transform_stage,
 473         &_tnl_vertex_program_stage,
 474
 475     &_r700_render_stage,
 476     &_tnl_render_stage,
 477     0,
 478 };
 479
 480