src/mesa/drivers/dri/r600/r700_render.c

   1 /*
   2  * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  */
  21
  22 /*
  23  * Authors:
  24  *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
  25  *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
  26  */
  27
  28 #include "main/glheader.h"
  29 #include "main/state.h"
  30 #include "main/imports.h"
  31 #include "main/enums.h"
  32 #include "main/macros.h"
  33 #include "main/context.h"
  34 #include "main/dd.h"
  35 #include "main/simple_list.h"
  36 #include "main/api_arrayelt.h"
  37 #include "swrast/swrast.h"
  38 #include "swrast_setup/swrast_setup.h"
  39 #include "vbo/vbo.h"
  40
  41 #include "tnl/tnl.h"
  42 #include "tnl/t_vp_build.h"
  43 #include "tnl/t_context.h"
  44 #include "tnl/t_vertex.h"
  45 #include "tnl/t_pipeline.h"
  46 #include "vbo/vbo_context.h"
  47
  48 #include "r600_context.h"
  49 #include "r600_cmdbuf.h"
  50
  51 #include "r600_tex.h"
  52
  53 #include "r700_vertprog.h"
  54 #include "r700_fragprog.h"
  55 #include "r700_state.h"
  56
  57 #include "radeon_buffer_objects.h"
  58 #include "radeon_common_context.h"
  59
  60 void r700WaitForIdle(context_t *context);
  61 void r700WaitForIdleClean(context_t *context);
  62 GLboolean r700SendTextureState(context_t *context);
  63 static unsigned int r700PrimitiveType(int prim);
  64 void r600UpdateTextureState(GLcontext * ctx);
  65 GLboolean r700SyncSurf(context_t *context,
  66                        struct radeon_bo *pbo,
  67                        uint32_t read_domain,
  68                        uint32_t write_domain,
  69                        uint32_t sync_type);
  70
  71 void r700WaitForIdle(context_t *context)
  72 {
  73     BATCH_LOCALS(&context->radeon);
  74     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
  75     BEGIN_BATCH_NO_AUTOSTATE(3);
  76
  77     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
  78     R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
  79     R600_OUT_BATCH(WAIT_3D_IDLE_bit);
  80
  81     END_BATCH();
  82     COMMIT_BATCH();
  83 }
  84
  85 void r700WaitForIdleClean(context_t *context)
  86 {
  87     BATCH_LOCALS(&context->radeon);
  88     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
  89     BEGIN_BATCH_NO_AUTOSTATE(5);
  90
  91     R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
  92     R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
  93
  94     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
  95     R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
  96     R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
  97
  98     END_BATCH();
  99     COMMIT_BATCH();
 100 }
 101
 102 void r700Start3D(context_t *context)
 103 {
 104     BATCH_LOCALS(&context->radeon);
 105     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
 106     if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
 107     {
 108         BEGIN_BATCH_NO_AUTOSTATE(2);
 109         R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
 110         R600_OUT_BATCH(0);
 111         END_BATCH();
 112     }
 113
 114     BEGIN_BATCH_NO_AUTOSTATE(3);
 115     R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
 116     R600_OUT_BATCH(0x80000000);
 117     R600_OUT_BATCH(0x80000000);
 118     END_BATCH();
 119
 120     COMMIT_BATCH();
 121
 122     r700WaitForIdleClean(context);
 123 }
 124
 125 GLboolean r700SyncSurf(context_t *context,
 126                        struct radeon_bo *pbo,
 127                        uint32_t read_domain,
 128                        uint32_t write_domain,
 129                        uint32_t sync_type)
 130 {
 131     BATCH_LOCALS(&context->radeon);
 132     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
 133     uint32_t cp_coher_size;
 134
 135     if (!pbo)
 136             return GL_FALSE;
 137
 138     if (pbo->size == 0xffffffff)
 139             cp_coher_size = 0xffffffff;
 140     else
 141             cp_coher_size = ((pbo->size + 255) >> 8);
 142
 143     BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
 144     R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
 145     R600_OUT_BATCH(sync_type);
 146     R600_OUT_BATCH(cp_coher_size);
 147     R600_OUT_BATCH(0);
 148     R600_OUT_BATCH(10);
 149     R600_OUT_BATCH_RELOC(0,
 150                          pbo,
 151                          0,
 152                          read_domain, write_domain, 0);
 153     END_BATCH();
 154     COMMIT_BATCH();
 155
 156     return GL_TRUE;
 157 }
 158
 159 static unsigned int r700PrimitiveType(int prim)
 160 {
 161     switch (prim & PRIM_MODE_MASK)
 162     {
 163     case GL_POINTS:
 164         return DI_PT_POINTLIST;
 165         break;
 166     case GL_LINES:
 167         return DI_PT_LINELIST;
 168         break;
 169     case GL_LINE_STRIP:
 170         return DI_PT_LINESTRIP;
 171         break;
 172     case GL_LINE_LOOP:
 173         return DI_PT_LINELOOP;
 174         break;
 175     case GL_TRIANGLES:
 176         return DI_PT_TRILIST;
 177         break;
 178     case GL_TRIANGLE_STRIP:
 179         return DI_PT_TRISTRIP;
 180         break;
 181     case GL_TRIANGLE_FAN:
 182         return DI_PT_TRIFAN;
 183         break;
 184     case GL_QUADS:
 185         return DI_PT_QUADLIST;
 186         break;
 187     case GL_QUAD_STRIP:
 188         return DI_PT_QUADSTRIP;
 189         break;
 190     case GL_POLYGON:
 191         return DI_PT_POLYGON;
 192         break;
 193     default:
 194         assert(0);
 195         return -1;
 196         break;
 197     }
 198 }
 199
 200 static int r700NumVerts(int num_verts, int prim)
 201 {
 202         int verts_off = 0;
 203
 204         switch (prim & PRIM_MODE_MASK) {
 205         case GL_POINTS:
 206                 verts_off = 0;
 207                 break;
 208         case GL_LINES:
 209                 verts_off = num_verts % 2;
 210                 break;
 211         case GL_LINE_STRIP:
 212                 if (num_verts < 2)
 213                         verts_off = num_verts;
 214                 break;
 215         case GL_LINE_LOOP:
 216                 if (num_verts < 2)
 217                         verts_off = num_verts;
 218                 break;
 219         case GL_TRIANGLES:
 220                 verts_off = num_verts % 3;
 221                 break;
 222         case GL_TRIANGLE_STRIP:
 223                 if (num_verts < 3)
 224                         verts_off = num_verts;
 225                 break;
 226         case GL_TRIANGLE_FAN:
 227                 if (num_verts < 3)
 228                         verts_off = num_verts;
 229                 break;
 230         case GL_QUADS:
 231                 verts_off = num_verts % 4;
 232                 break;
 233         case GL_QUAD_STRIP:
 234                 if (num_verts < 4)
 235                         verts_off = num_verts;
 236                 else
 237                         verts_off = num_verts % 2;
 238                 break;
 239         case GL_POLYGON:
 240                 if (num_verts < 3)
 241                         verts_off = num_verts;
 242                 break;
 243         default:
 244                 assert(0);
 245                 return -1;
 246                 break;
 247         }
 248
 249         return num_verts - verts_off;
 250 }
 251
 252 static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
 253 {
 254         context_t *context = R700_CONTEXT(ctx);
 255         BATCH_LOCALS(&context->radeon);
 256         int type, i, total_emit;
 257         int num_indices;
 258         uint32_t vgt_draw_initiator = 0;
 259         uint32_t vgt_index_type     = 0;
 260         uint32_t vgt_primitive_type = 0;
 261         uint32_t vgt_num_indices    = 0;
 262         TNLcontext *tnl = TNL_CONTEXT(ctx);
 263         struct vertex_buffer *vb = &tnl->vb;
 264
 265         type = r700PrimitiveType(prim);
 266         num_indices = r700NumVerts(end - start, prim);
 267
 268         radeon_print(RADEON_RENDER, RADEON_TRACE,
 269                 "%s type %x num_indices %d\n",
 270                 __func__, type, num_indices);
 271
 272         if (type < 0 || num_indices <= 0)
 273                 return;
 274
 275     total_emit =   3 /* VGT_PRIMITIVE_TYPE */
 276              + 2 /* VGT_INDEX_TYPE */
 277              + 2 /* NUM_INSTANCES */
 278                  + num_indices + 3; /* DRAW_INDEX_IMMD */
 279
 280     BEGIN_BATCH_NO_AUTOSTATE(total_emit);
 281         // prim
 282     SETfield(vgt_primitive_type, type,
 283         VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
 284     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
 285     R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
 286     R600_OUT_BATCH(vgt_primitive_type);
 287
 288         // index type
 289     SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
 290     R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
 291     R600_OUT_BATCH(vgt_index_type);
 292
 293         // num instances
 294         R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
 295         R600_OUT_BATCH(1);
 296
 297         // draw packet
 298     vgt_num_indices = num_indices;
 299     SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
 300         SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
 301
 302     R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
 303     R600_OUT_BATCH(vgt_num_indices);
 304     R600_OUT_BATCH(vgt_draw_initiator);
 305
 306     if(NULL == context->ind_buf.bo)
 307     {
 308         for (i = start; i < (start + num_indices); i++) {
 309             if(vb->Elts)
 310             {
 311                 R600_OUT_BATCH(vb->Elts[i]);
 312             }
 313             else
 314                 R600_OUT_BATCH(i);
 315         }
 316     }
 317     else
 318     {
 319         if(GL_TRUE == context->ind_buf.bHostIb)
 320         {
 321             if(GL_TRUE != context->ind_buf.is_32bit)
 322             {
 323                 GLushort * pIndex = (GLushort*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 324                 pIndex += start;
 325                 for (i = 0; i < num_indices; i++)
 326                 {
 327                     R600_OUT_BATCH(*pIndex);
 328                     pIndex++;
 329                 }
 330             }
 331             else
 332             {
 333                 GLuint * pIndex = (GLuint*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 334                 pIndex += start;
 335
 336                 for (i = 0; i < num_indices; i++)
 337                 {
 338                     R600_OUT_BATCH(*pIndex);
 339                     pIndex++;
 340                 }
 341             }
 342         }
 343         else
 344         {
 345             /* TODO : hw ib draw */
 346         }
 347     }
 348
 349     END_BATCH();
 350     COMMIT_BATCH();
 351 }
 352
 353 /* start 3d, idle, cb/db flush */
 354 #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
 355
 356 static GLuint r700PredictRenderSize(GLcontext* ctx)
 357 {
 358     context_t *context = R700_CONTEXT(ctx);
 359     TNLcontext *tnl = TNL_CONTEXT(ctx);
 360     struct r700_vertex_program *vp = context->selected_vp;
 361     struct vertex_buffer *vb = &tnl->vb;
 362     GLboolean flushed;
 363     GLuint dwords, i;
 364     GLuint state_size;
 365     /* pre calculate aos count so state prediction works */
 366     context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead);
 367
 368     dwords = PRE_EMIT_STATE_BUFSZ;
 369     for (i = 0; i < vb->PrimitiveCount; i++)
 370         dwords += vb->Primitive[i].count + 10;
 371     state_size = radeonCountStateEmitSize(&context->radeon);
 372     flushed = rcommonEnsureCmdBufSpace(&context->radeon,
 373             dwords + state_size, __FUNCTION__);
 374
 375     if (flushed)
 376         dwords += radeonCountStateEmitSize(&context->radeon);
 377     else
 378         dwords += state_size;
 379
 380     radeon_print(RADEON_RENDER, RADEON_VERBOSE,
 381         "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
 382     return dwords;
 383 }
 384
 385 static GLboolean r700RunRender(GLcontext * ctx,
 386                                struct tnl_pipeline_stage *stage)
 387 {
 388     context_t *context = R700_CONTEXT(ctx);
 389     radeonContextPtr radeon = &context->radeon;
 390     unsigned int i, id = 0;
 391     TNLcontext *tnl = TNL_CONTEXT(ctx);
 392     struct vertex_buffer *vb = &tnl->vb;
 393     struct radeon_renderbuffer *rrb;
 394
 395     radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n",
 396                 __func__, context->radeon.cmdbuf.cs->cdw);
 397
 398     /* always emit CB base to prevent
 399      * lock ups on some chips.
 400      */
 401     R600_STATECHANGE(context, cb_target);
 402     /* mark vtx as dirty since it changes per-draw */
 403     R600_STATECHANGE(context, vtx);
 404
 405     r700SetScissor(context);
 406     r700SetupVertexProgram(ctx);
 407     r700SetupFragmentProgram(ctx);
 408     r600UpdateTextureState(ctx);
 409
 410     GLuint emit_end = r700PredictRenderSize(ctx)
 411         + context->radeon.cmdbuf.cs->cdw;
 412     r700SetupStreams(ctx);
 413
 414     radeonEmitState(radeon);
 415
 416     radeon_debug_add_indent();
 417     /* richard test code */
 418     for (i = 0; i < vb->PrimitiveCount; i++) {
 419         GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
 420         GLuint start = vb->Primitive[i].start;
 421         GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
 422         r700RunRenderPrimitive(ctx, start, end, prim);
 423     }
 424     radeon_debug_remove_indent();
 425
 426     /* Flush render op cached for last several quads. */
 427     r700WaitForIdleClean(context);
 428
 429     rrb = radeon_get_colorbuffer(&context->radeon);
 430     if (rrb && rrb->bo)
 431             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
 432                          CB_ACTION_ENA_bit | (1 << (id + 6)));
 433
 434     rrb = radeon_get_depthbuffer(&context->radeon);
 435     if (rrb && rrb->bo)
 436             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
 437                          DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
 438
 439     radeonReleaseArrays(ctx, ~0);
 440
 441     radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n",
 442                 __func__, context->radeon.cmdbuf.cs->cdw);
 443
 444     if ( emit_end < context->radeon.cmdbuf.cs->cdw )
 445        WARN_ONCE("Rendering was %d commands larger than predicted size."
 446                " We might overflow  command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
 447
 448     return GL_FALSE;
 449 }
 450
 451 static GLboolean r700RunNonTCLRender(GLcontext * ctx,
 452                                      struct tnl_pipeline_stage *stage) /* -------------------- */
 453 {
 454         GLboolean bRet = GL_TRUE;
 455
 456         return bRet;
 457 }
 458
 459 static GLboolean r700RunTCLRender(GLcontext * ctx,  /*----------------------*/
 460                                   struct tnl_pipeline_stage *stage)
 461 {
 462         GLboolean bRet = GL_FALSE;
 463
 464     /* TODO : sw fallback */
 465
 466     /* Need shader bo's setup before bo check */
 467     r700UpdateShaders(ctx);
 468     /**
 469
 470     * Ensure all enabled and complete textures are uploaded along with any buffers being used.
 471     */
 472     if(!r600ValidateBuffers(ctx))
 473     {
 474         return GL_TRUE;
 475     }
 476
 477     bRet = r700RunRender(ctx, stage);
 478
 479     return bRet;
 480         //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
 481     //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
 482 }
 483
 484 const struct tnl_pipeline_stage _r700_render_stage = {
 485         "r700 Hardware Rasterization",
 486         NULL,
 487         NULL,
 488         NULL,
 489         NULL,
 490         r700RunNonTCLRender
 491 };
 492
 493 const struct tnl_pipeline_stage _r700_tcl_stage = {
 494         "r700 Hardware Transform, Clipping and Lighting",
 495         NULL,
 496         NULL,
 497         NULL,
 498         NULL,
 499         r700RunTCLRender
 500 };
 501
 502 const struct tnl_pipeline_stage *r700_pipeline[] =
 503 {
 504     &_r700_tcl_stage,
 505     &_tnl_vertex_transform_stage,
 506         &_tnl_normal_transform_stage,
 507         &_tnl_lighting_stage,
 508         &_tnl_fog_coordinate_stage,
 509         &_tnl_texgen_stage,
 510         &_tnl_texture_transform_stage,
 511         &_tnl_vertex_program_stage,
 512
 513     &_r700_render_stage,
 514     &_tnl_render_stage,
 515     0,
 516 };
 517
 518 #define CONVERT( TYPE, MACRO ) do {             \
 519         GLuint i, j, sz;                                \
 520         sz = input->Size;                               \
 521         if (input->Normalized) {                        \
 522                 for (i = 0; i < count; i++) {           \
 523                         const TYPE *in = (TYPE *)src_ptr;               \
 524                         for (j = 0; j < sz; j++) {              \
 525                                 *dst_ptr++ = MACRO(*in);                \
 526                                 in++;                           \
 527                         }                                       \
 528                         src_ptr += stride;                      \
 529                 }                                               \
 530         } else {                                        \
 531                 for (i = 0; i < count; i++) {           \
 532                         const TYPE *in = (TYPE *)src_ptr;               \
 533                         for (j = 0; j < sz; j++) {              \
 534                                 *dst_ptr++ = (GLfloat)(*in);            \
 535                                 in++;                           \
 536                         }                                       \
 537                         src_ptr += stride;                      \
 538                 }                                               \
 539         }                                               \
 540 } while (0)
 541
 542 /**
 543  * Convert attribute data type to float
 544  * If the attribute uses named buffer object replace the bo with newly allocated bo
 545  */
 546 static void r700ConvertAttrib(GLcontext *ctx, int count,
 547                               const struct gl_client_array *input,
 548                               struct StreamDesc *attr)
 549 {
 550     context_t *context = R700_CONTEXT(ctx);
 551     const GLvoid *src_ptr;
 552     GLboolean mapped_named_bo = GL_FALSE;
 553     GLfloat *dst_ptr;
 554     GLuint stride;
 555
 556     stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
 557
 558     /* Convert value for first element only */
 559     if (input->StrideB == 0)
 560     {
 561         count = 1;
 562     }
 563
 564     if (input->BufferObj->Name)
 565     {
 566         if (!input->BufferObj->Pointer)
 567         {
 568             ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
 569             mapped_named_bo = GL_TRUE;
 570         }
 571
 572         src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
 573     }
 574     else
 575     {
 576         src_ptr = input->Ptr;
 577     }
 578
 579     radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
 580                          sizeof(GLfloat) * input->Size * count, 32);
 581     dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
 582
 583     assert(src_ptr != NULL);
 584
 585     switch (input->Type)
 586     {
 587         case GL_DOUBLE:
 588             CONVERT(GLdouble, (GLfloat));
 589             break;
 590         case GL_UNSIGNED_INT:
 591             CONVERT(GLuint, UINT_TO_FLOAT);
 592             break;
 593         case GL_INT:
 594             CONVERT(GLint, INT_TO_FLOAT);
 595             break;
 596         case GL_UNSIGNED_SHORT:
 597             CONVERT(GLushort, USHORT_TO_FLOAT);
 598             break;
 599         case GL_SHORT:
 600             CONVERT(GLshort, SHORT_TO_FLOAT);
 601             break;
 602         case GL_UNSIGNED_BYTE:
 603             assert(input->Format != GL_BGRA);
 604             CONVERT(GLubyte, UBYTE_TO_FLOAT);
 605             break;
 606         case GL_BYTE:
 607             CONVERT(GLbyte, BYTE_TO_FLOAT);
 608             break;
 609         default:
 610             assert(0);
 611             break;
 612     }
 613
 614     if (mapped_named_bo)
 615     {
 616         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
 617     }
 618 }
 619
 620 static void r700AlignDataToDword(GLcontext *ctx,
 621                                  const struct gl_client_array *input,
 622                                  int count,
 623                                  struct StreamDesc *attr)
 624 {
 625     context_t *context = R700_CONTEXT(ctx);
 626     const int dst_stride = (input->StrideB + 3) & ~3;
 627     const int size = getTypeSize(input->Type) * input->Size * count;
 628     GLboolean mapped_named_bo = GL_FALSE;
 629
 630     radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
 631
 632     if (!input->BufferObj->Pointer)
 633     {
 634         ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
 635         mapped_named_bo = GL_TRUE;
 636     }
 637
 638     {
 639         GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
 640         GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
 641         int i;
 642
 643         for (i = 0; i < count; ++i)
 644         {
 645             _mesa_memcpy(dst_ptr, src_ptr, input->StrideB);
 646             src_ptr += input->StrideB;
 647             dst_ptr += dst_stride;
 648         }
 649     }
 650
 651     if (mapped_named_bo)
 652     {
 653         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
 654     }
 655
 656     attr->stride = dst_stride;
 657 }
 658
 659 static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count)
 660 {
 661         context_t *context = R700_CONTEXT(ctx);
 662     GLuint stride;
 663     int ret;
 664     int i, index;
 665
 666     R600_STATECHANGE(context, vtx);
 667
 668     for(index = 0; index < context->nNumActiveAos; index++)
 669     {
 670         struct radeon_aos *aos = &context->radeon.tcl.aos[index];
 671         i = context->stream_desc[index].element;
 672
 673         stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
 674
 675         if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
 676 #if MESA_BIG_ENDIAN
 677             getTypeSize(input[i]->Type) != 4 ||
 678 #endif
 679             stride < 4)
 680         {
 681             r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
 682         }
 683         else
 684         {
 685             if (input[i]->BufferObj->Name)
 686             {
 687                 if (stride % 4 != 0)
 688                 {
 689                     assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
 690                     r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
 691                     context->stream_desc[index].is_named_bo = GL_FALSE;
 692                 }
 693                 else
 694                 {
 695                     context->stream_desc[index].stride = input[i]->StrideB;
 696                     context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
 697                     context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
 698                     context->stream_desc[index].is_named_bo = GL_TRUE;
 699                 }
 700             }
 701             else
 702             {
 703                 int size;
 704                 int local_count = count;
 705                 uint32_t *dst;
 706
 707                 if (input[i]->StrideB == 0)
 708                 {
 709                     size = getTypeSize(input[i]->Type) * input[i]->Size;
 710                     local_count = 1;
 711                 }
 712                 else
 713                 {
 714                     size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
 715                 }
 716
 717                 radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
 718                                      &context->stream_desc[index].bo_offset, size, 32);
 719                 assert(context->stream_desc[index].bo->ptr != NULL);
 720                 dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
 721                                                context->stream_desc[index].bo_offset);
 722
 723                 switch (context->stream_desc[index].dwords)
 724                 {
 725                 case 1:
 726                     radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 727                                         context->stream_desc[index].stride = 4;
 728                     break;
 729                 case 2:
 730                     radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 731                                         context->stream_desc[index].stride = 8;
 732                     break;
 733                 case 3:
 734                     radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 735                                         context->stream_desc[index].stride = 12;
 736                     break;
 737                 case 4:
 738                     radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 739                                         context->stream_desc[index].stride = 16;
 740                     break;
 741                 default:
 742                     assert(0);
 743                     break;
 744                 }
 745             }
 746         }
 747
 748         aos->count = context->stream_desc[index].stride == 0 ? 1 : count;
 749         aos->stride = context->stream_desc[index].stride / sizeof(float);
 750         aos->components = context->stream_desc[index].dwords;
 751         aos->bo = context->stream_desc[index].bo;
 752         aos->offset = context->stream_desc[index].bo_offset;
 753
 754         if(context->stream_desc[index].is_named_bo)
 755         {
 756             radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs,
 757                                               context->stream_desc[index].bo,
 758                                               RADEON_GEM_DOMAIN_GTT, 0);
 759         }
 760     }
 761
 762     context->radeon.tcl.aos_count = context->nNumActiveAos;
 763     ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
 764                                         first_elem(&context->radeon.dma.reserved)->bo,
 765                                         RADEON_GEM_DOMAIN_GTT, 0);
 766 }
 767
 768 static void r700FreeData(GLcontext *ctx)
 769 {
 770     /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
 771      * to prevent double unref in radeonReleaseArrays
 772      * called during context destroy
 773      */
 774     context_t *context = R700_CONTEXT(ctx);
 775
 776     int i;
 777
 778     for (i = 0; i < context->nNumActiveAos; i++)
 779     {
 780         if (!context->stream_desc[i].is_named_bo)
 781         {
 782                 radeon_bo_unref(context->stream_desc[i].bo);
 783         }
 784         context->radeon.tcl.aos[i].bo = NULL;
 785     }
 786
 787     if (context->ind_buf.bo != NULL)
 788     {
 789         if(context->ind_buf.bHostIb != GL_TRUE)
 790         {
 791             radeon_bo_unref(context->ind_buf.bo);
 792         }
 793         else
 794         {
 795             FREE(context->ind_buf.bo->ptr);
 796             FREE(context->ind_buf.bo);
 797             context->ind_buf.bo = NULL;
 798         }
 799     }
 800 }
 801
 802 static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
 803 {
 804     context_t *context = R700_CONTEXT(ctx);
 805     GLvoid *src_ptr;
 806     GLuint *out;
 807     int i;
 808     GLboolean mapped_named_bo = GL_FALSE;
 809
 810     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
 811     {
 812         ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 813         mapped_named_bo = GL_TRUE;
 814         assert(mesa_ind_buf->obj->Pointer != NULL);
 815     }
 816     src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
 817
 818     if (mesa_ind_buf->type == GL_UNSIGNED_BYTE)
 819     {
 820         GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
 821         GLubyte *in = (GLubyte *)src_ptr;
 822
 823         if(context->ind_buf.bHostIb != GL_TRUE)
 824         {
 825             radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
 826                                  &context->ind_buf.bo_offset, size, 4);
 827
 828             assert(context->ind_buf.bo->ptr != NULL);
 829             out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 830         }
 831         else
 832         {
 833             context->ind_buf.bo        = MALLOC_STRUCT(radeon_bo);
 834             context->ind_buf.bo->ptr   = ALIGN_MALLOC(size, 4);
 835             context->ind_buf.bo_offset = 0;
 836             out                        = (GLuint *)context->ind_buf.bo->ptr;
 837         }
 838
 839         for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
 840         {
 841             *out++ = in[i] | in[i + 1] << 16;
 842         }
 843
 844         if (i < mesa_ind_buf->count)
 845         {
 846             *out++ = in[i];
 847         }
 848
 849 #if MESA_BIG_ENDIAN
 850     }
 851     else
 852     { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
 853         GLushort *in = (GLushort *)src_ptr;
 854         GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
 855
 856         if(context->ind_buf.bHostIb != GL_TRUE)
 857         {
 858             radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
 859                                  &context->ind_buf.bo_offset, size, 4);
 860
 861             assert(context->ind_buf.bo->ptr != NULL);
 862             out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 863         }
 864         else
 865         {
 866             context->ind_buf.bo        = MALLOC_STRUCT(radeon_bo);
 867             context->ind_buf.bo->ptr   = ALIGN_MALLOC(size, 4);
 868             context->ind_buf.bo_offset = 0;
 869             out                        = (GLuint *)context->ind_buf.bo->ptr;
 870         }
 871
 872         for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
 873         {
 874             *out++ = in[i] | in[i + 1] << 16;
 875         }
 876
 877         if (i < mesa_ind_buf->count)
 878         {
 879             *out++ = in[i];
 880         }
 881 #endif
 882     }
 883
 884     context->ind_buf.is_32bit = GL_FALSE;
 885     context->ind_buf.count = mesa_ind_buf->count;
 886
 887     if (mapped_named_bo)
 888     {
 889         ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
 890     }
 891 }
 892
 893 static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
 894 {
 895     context_t *context = R700_CONTEXT(ctx);
 896
 897     if (!mesa_ind_buf) {
 898         context->ind_buf.bo = NULL;
 899         return;
 900     }
 901
 902     context->ind_buf.bHostIb = GL_TRUE;
 903
 904 #if MESA_BIG_ENDIAN
 905     if (mesa_ind_buf->type == GL_UNSIGNED_INT)
 906     {
 907 #else
 908     if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
 909     {
 910 #endif
 911         const GLvoid *src_ptr;
 912         GLvoid *dst_ptr;
 913         GLboolean mapped_named_bo = GL_FALSE;
 914
 915         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
 916         {
 917                 ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 918                 assert(mesa_ind_buf->obj->Pointer != NULL);
 919                 mapped_named_bo = GL_TRUE;
 920         }
 921
 922         src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
 923
 924         const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
 925
 926         if(context->ind_buf.bHostIb != GL_TRUE)
 927         {
 928             radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
 929                                  &context->ind_buf.bo_offset, size, 4);
 930             assert(context->ind_buf.bo->ptr != NULL);
 931             dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 932         }
 933         else
 934         {
 935             context->ind_buf.bo        = MALLOC_STRUCT(radeon_bo);
 936             context->ind_buf.bo->ptr   = ALIGN_MALLOC(size, 4);
 937             context->ind_buf.bo_offset = 0;
 938             dst_ptr                    = context->ind_buf.bo->ptr;
 939         }
 940
 941         _mesa_memcpy(dst_ptr, src_ptr, size);
 942
 943         context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
 944         context->ind_buf.count = mesa_ind_buf->count;
 945
 946         if (mapped_named_bo)
 947         {
 948                 ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
 949         }
 950     }
 951     else
 952     {
 953             r700FixupIndexBuffer(ctx, mesa_ind_buf);
 954     }
 955 }
 956
 957 static GLboolean r700TryDrawPrims(GLcontext *ctx,
 958                                          const struct gl_client_array *arrays[],
 959                                          const struct _mesa_prim *prim,
 960                                          GLuint nr_prims,
 961                                          const struct _mesa_index_buffer *ib,
 962                                          GLuint min_index,
 963                                          GLuint max_index )
 964 {
 965     context_t *context = R700_CONTEXT(ctx);
 966     radeonContextPtr radeon = &context->radeon;
 967     GLuint i, id = 0;
 968     GLboolean bValidedbuffer;
 969     struct radeon_renderbuffer *rrb;
 970
 971     if (ctx->NewState)
 972     {
 973         _mesa_update_state( ctx );
 974     }
 975
 976     bValidedbuffer = r600ValidateBuffers(ctx);
 977
 978     /* always emit CB base to prevent
 979      * lock ups on some chips.
 980      */
 981     R600_STATECHANGE(context, cb_target);
 982     /* mark vtx as dirty since it changes per-draw */
 983     R600_STATECHANGE(context, vtx);
 984
 985     _tnl_UpdateFixedFunctionProgram(ctx);
 986     r700SetVertexFormat(ctx, arrays, max_index + 1);
 987         r700SetupStreams2(ctx, arrays, max_index + 1);
 988     r700UpdateShaders2(ctx);
 989
 990     r700SetScissor(context);
 991
 992     r700SetupVertexProgram(ctx);
 993
 994     r700SetupFragmentProgram(ctx);
 995
 996     r600UpdateTextureState(ctx);
 997
 998     GLuint emit_end = r700PredictRenderSize(ctx)
 999                     + context->radeon.cmdbuf.cs->cdw;
1000
1001     r700SetupIndexBuffer(ctx, ib);
1002
1003     radeonEmitState(radeon);
1004
1005     for (i = 0; i < nr_prims; ++i)
1006     {
1007             r700RunRenderPrimitive(ctx,
1008                                prim[i].start,
1009                                prim[i].start + prim[i].count,
1010                                prim[i].mode);
1011     }
1012
1013     /* Flush render op cached for last several quads. */
1014     r700WaitForIdleClean(context);
1015
1016     rrb = radeon_get_colorbuffer(&context->radeon);
1017     if (rrb && rrb->bo)
1018             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
1019                          CB_ACTION_ENA_bit | (1 << (id + 6)));
1020
1021     rrb = radeon_get_depthbuffer(&context->radeon);
1022     if (rrb && rrb->bo)
1023             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
1024                          DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
1025
1026     r700FreeData(ctx);
1027
1028     if (emit_end < context->radeon.cmdbuf.cs->cdw)
1029     {
1030         WARN_ONCE("Rendering was %d commands larger than predicted size."
1031             " We might overflow  command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
1032     }
1033
1034     return GL_TRUE;
1035 }
1036
1037 static void r700DrawPrimsRe(GLcontext *ctx,
1038                          const struct gl_client_array *arrays[],
1039                          const struct _mesa_prim *prim,
1040                          GLuint nr_prims,
1041                          const struct _mesa_index_buffer *ib,
1042                          GLboolean index_bounds_valid,
1043                          GLuint min_index,
1044                          GLuint max_index)
1045 {
1046     GLboolean retval = GL_FALSE;
1047
1048     /* This check should get folded into just the places that
1049          * min/max index are really needed.
1050          */
1051         if (!index_bounds_valid) {
1052                 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
1053         }
1054
1055         if (min_index) {
1056                 vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrimsRe );
1057                 return;
1058         }
1059
1060         /* Make an attempt at drawing */
1061         retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
1062
1063         /* If failed run tnl pipeline - it should take care of fallbacks */
1064         if (!retval)
1065                 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
1066 }
1067
1068 static void r700DrawPrims(GLcontext *ctx,
1069                          const struct gl_client_array *arrays[],
1070                          const struct _mesa_prim *prim,
1071                          GLuint nr_prims,
1072                          const struct _mesa_index_buffer *ib,
1073                          GLboolean index_bounds_valid,
1074                          GLuint min_index,
1075                          GLuint max_index)
1076 {
1077     context_t *context = R700_CONTEXT(ctx);
1078
1079     /* For non indexed drawing, using tnl pipe. */
1080     if(!ib)
1081     {
1082         context->ind_buf.bo = NULL;
1083
1084         _tnl_vbo_draw_prims(ctx, arrays, prim, nr_prims, ib,
1085                             index_bounds_valid, min_index, max_index);
1086         return;
1087     }
1088
1089         r700DrawPrimsRe(ctx, arrays, prim, nr_prims, ib, index_bounds_valid, min_index, max_index);
1090 }
1091
1092 void r700InitDraw(GLcontext *ctx)
1093 {
1094         struct vbo_context *vbo = vbo_context(ctx);
1095
1096         vbo->draw_prims = r700DrawPrims;
1097 }
1098
1099