src/mesa/drivers/dri/r600/r700_render.c

   1 /*
   2  * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  */
  21
  22 /*
  23  * Authors:
  24  *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
  25  *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
  26  */
  27
  28 #include "main/glheader.h"
  29 #include "main/state.h"
  30 #include "main/imports.h"
  31 #include "main/enums.h"
  32 #include "main/macros.h"
  33 #include "main/context.h"
  34 #include "main/dd.h"
  35 #include "main/simple_list.h"
  36 #include "main/api_arrayelt.h"
  37 #include "swrast/swrast.h"
  38 #include "swrast_setup/swrast_setup.h"
  39 #include "vbo/vbo.h"
  40
  41 #include "tnl/tnl.h"
  42 #include "tnl/t_vp_build.h"
  43 #include "tnl/t_context.h"
  44 #include "tnl/t_vertex.h"
  45 #include "tnl/t_pipeline.h"
  46 #include "vbo/vbo_context.h"
  47
  48 #include "r600_context.h"
  49 #include "r600_cmdbuf.h"
  50
  51 #include "r600_tex.h"
  52
  53 #include "r700_vertprog.h"
  54 #include "r700_fragprog.h"
  55 #include "r700_state.h"
  56
  57 #include "radeon_buffer_objects.h"
  58 #include "radeon_common_context.h"
  59
  60 void r700WaitForIdle(context_t *context);
  61 void r700WaitForIdleClean(context_t *context);
  62 GLboolean r700SendTextureState(context_t *context);
  63 static unsigned int r700PrimitiveType(int prim);
  64 void r600UpdateTextureState(GLcontext * ctx);
  65 GLboolean r700SyncSurf(context_t *context,
  66                        struct radeon_bo *pbo,
  67                        uint32_t read_domain,
  68                        uint32_t write_domain,
  69                        uint32_t sync_type);
  70
  71 void r700WaitForIdle(context_t *context)
  72 {
  73     BATCH_LOCALS(&context->radeon);
  74     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
  75     BEGIN_BATCH_NO_AUTOSTATE(3);
  76
  77     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
  78     R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
  79     R600_OUT_BATCH(WAIT_3D_IDLE_bit);
  80
  81     END_BATCH();
  82     COMMIT_BATCH();
  83 }
  84
  85 void r700WaitForIdleClean(context_t *context)
  86 {
  87     BATCH_LOCALS(&context->radeon);
  88     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
  89     BEGIN_BATCH_NO_AUTOSTATE(5);
  90
  91     R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
  92     R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
  93
  94     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
  95     R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
  96     R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
  97
  98     END_BATCH();
  99     COMMIT_BATCH();
 100 }
 101
 102 void r700Start3D(context_t *context)
 103 {
 104     BATCH_LOCALS(&context->radeon);
 105     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
 106     if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
 107     {
 108         BEGIN_BATCH_NO_AUTOSTATE(2);
 109         R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
 110         R600_OUT_BATCH(0);
 111         END_BATCH();
 112     }
 113
 114     BEGIN_BATCH_NO_AUTOSTATE(3);
 115     R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
 116     R600_OUT_BATCH(0x80000000);
 117     R600_OUT_BATCH(0x80000000);
 118     END_BATCH();
 119
 120     COMMIT_BATCH();
 121
 122     r700WaitForIdleClean(context);
 123 }
 124
 125 GLboolean r700SyncSurf(context_t *context,
 126                        struct radeon_bo *pbo,
 127                        uint32_t read_domain,
 128                        uint32_t write_domain,
 129                        uint32_t sync_type)
 130 {
 131     BATCH_LOCALS(&context->radeon);
 132     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
 133     uint32_t cp_coher_size;
 134
 135     if (!pbo)
 136             return GL_FALSE;
 137
 138     if (pbo->size == 0xffffffff)
 139             cp_coher_size = 0xffffffff;
 140     else
 141             cp_coher_size = ((pbo->size + 255) >> 8);
 142
 143     BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
 144     R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
 145     R600_OUT_BATCH(sync_type);
 146     R600_OUT_BATCH(cp_coher_size);
 147     R600_OUT_BATCH(0);
 148     R600_OUT_BATCH(10);
 149     R600_OUT_BATCH_RELOC(0,
 150                          pbo,
 151                          0,
 152                          read_domain, write_domain, 0);
 153     END_BATCH();
 154     COMMIT_BATCH();
 155
 156     return GL_TRUE;
 157 }
 158
 159 static unsigned int r700PrimitiveType(int prim)
 160 {
 161     switch (prim & PRIM_MODE_MASK)
 162     {
 163     case GL_POINTS:
 164         return DI_PT_POINTLIST;
 165         break;
 166     case GL_LINES:
 167         return DI_PT_LINELIST;
 168         break;
 169     case GL_LINE_STRIP:
 170         return DI_PT_LINESTRIP;
 171         break;
 172     case GL_LINE_LOOP:
 173         return DI_PT_LINELOOP;
 174         break;
 175     case GL_TRIANGLES:
 176         return DI_PT_TRILIST;
 177         break;
 178     case GL_TRIANGLE_STRIP:
 179         return DI_PT_TRISTRIP;
 180         break;
 181     case GL_TRIANGLE_FAN:
 182         return DI_PT_TRIFAN;
 183         break;
 184     case GL_QUADS:
 185         return DI_PT_QUADLIST;
 186         break;
 187     case GL_QUAD_STRIP:
 188         return DI_PT_QUADSTRIP;
 189         break;
 190     case GL_POLYGON:
 191         return DI_PT_POLYGON;
 192         break;
 193     default:
 194         assert(0);
 195         return -1;
 196         break;
 197     }
 198 }
 199
 200 static int r700NumVerts(int num_verts, int prim)
 201 {
 202         int verts_off = 0;
 203
 204         switch (prim & PRIM_MODE_MASK) {
 205         case GL_POINTS:
 206                 verts_off = 0;
 207                 break;
 208         case GL_LINES:
 209                 verts_off = num_verts % 2;
 210                 break;
 211         case GL_LINE_STRIP:
 212                 if (num_verts < 2)
 213                         verts_off = num_verts;
 214                 break;
 215         case GL_LINE_LOOP:
 216                 if (num_verts < 2)
 217                         verts_off = num_verts;
 218                 break;
 219         case GL_TRIANGLES:
 220                 verts_off = num_verts % 3;
 221                 break;
 222         case GL_TRIANGLE_STRIP:
 223                 if (num_verts < 3)
 224                         verts_off = num_verts;
 225                 break;
 226         case GL_TRIANGLE_FAN:
 227                 if (num_verts < 3)
 228                         verts_off = num_verts;
 229                 break;
 230         case GL_QUADS:
 231                 verts_off = num_verts % 4;
 232                 break;
 233         case GL_QUAD_STRIP:
 234                 if (num_verts < 4)
 235                         verts_off = num_verts;
 236                 else
 237                         verts_off = num_verts % 2;
 238                 break;
 239         case GL_POLYGON:
 240                 if (num_verts < 3)
 241                         verts_off = num_verts;
 242                 break;
 243         default:
 244                 assert(0);
 245                 return -1;
 246                 break;
 247         }
 248
 249         return num_verts - verts_off;
 250 }
 251
 252 static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
 253 {
 254     context_t *context = R700_CONTEXT(ctx);
 255     BATCH_LOCALS(&context->radeon);
 256     int type, i, total_emit;
 257     int num_indices;
 258     uint32_t vgt_draw_initiator = 0;
 259     uint32_t vgt_index_type     = 0;
 260     uint32_t vgt_primitive_type = 0;
 261     uint32_t vgt_num_indices    = 0;
 262     TNLcontext *tnl = TNL_CONTEXT(ctx);
 263     struct vertex_buffer *vb = &tnl->vb;
 264     GLboolean bUseDrawIndex;
 265
 266     if(NULL != context->ind_buf.bo)
 267     {
 268         bUseDrawIndex = GL_TRUE;
 269     }
 270     else
 271     {
 272         bUseDrawIndex = GL_FALSE;
 273     }
 274
 275     type = r700PrimitiveType(prim);
 276     num_indices = r700NumVerts(end - start, prim);
 277
 278     radeon_print(RADEON_RENDER, RADEON_TRACE,
 279                  "%s type %x num_indices %d\n",
 280                  __func__, type, num_indices);
 281
 282     if (type < 0 || num_indices <= 0)
 283             return;
 284
 285     if(GL_TRUE == bUseDrawIndex)
 286     {
 287         total_emit =   3  /* VGT_PRIMITIVE_TYPE */
 288                      + 2  /* VGT_INDEX_TYPE */
 289                      + 2  /* NUM_INSTANCES */
 290                      + 5 + 2; /* DRAW_INDEX */
 291     }
 292     else
 293     {
 294         total_emit =   3 /* VGT_PRIMITIVE_TYPE */
 295                      + 2 /* VGT_INDEX_TYPE */
 296                      + 2 /* NUM_INSTANCES */
 297                      + num_indices + 3; /* DRAW_INDEX_IMMD */
 298     }
 299
 300     BEGIN_BATCH_NO_AUTOSTATE(total_emit);
 301     // prim
 302     SETfield(vgt_primitive_type, type,
 303              VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
 304     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
 305     R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
 306     R600_OUT_BATCH(vgt_primitive_type);
 307
 308         // index type
 309     SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
 310
 311     if(GL_TRUE == bUseDrawIndex)
 312     {
 313         if(GL_TRUE != context->ind_buf.is_32bit)
 314         {
 315             SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
 316         }
 317     }
 318
 319     R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
 320     R600_OUT_BATCH(vgt_index_type);
 321
 322     // num instances
 323     R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
 324     R600_OUT_BATCH(1);
 325
 326     // draw packet
 327     vgt_num_indices = num_indices;
 328
 329     if(GL_TRUE == bUseDrawIndex)
 330     {
 331         SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
 332     }
 333     else
 334     {
 335         SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
 336     }
 337
 338         SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
 339
 340     if(GL_TRUE == bUseDrawIndex)
 341     {
 342         R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
 343         R600_OUT_BATCH(context->ind_buf.bo_offset);
 344         R600_OUT_BATCH(0);
 345         R600_OUT_BATCH(vgt_num_indices);
 346         R600_OUT_BATCH(vgt_draw_initiator);
 347         R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
 348                              context->ind_buf.bo,
 349                              context->ind_buf.bo_offset,
 350                              RADEON_GEM_DOMAIN_GTT, 0, 0);
 351     }
 352     else
 353     {
 354         R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
 355         R600_OUT_BATCH(vgt_num_indices);
 356         R600_OUT_BATCH(vgt_draw_initiator);
 357
 358         for (i = start; i < (start + num_indices); i++)
 359         {
 360             if(vb->Elts)
 361             {
 362                 R600_OUT_BATCH(vb->Elts[i]);
 363             }
 364             else
 365             {
 366                 R600_OUT_BATCH(i);
 367             }
 368         }
 369     }
 370
 371     END_BATCH();
 372     COMMIT_BATCH();
 373 }
 374
 375 /* start 3d, idle, cb/db flush */
 376 #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
 377
 378 static GLuint r700PredictRenderSize(GLcontext* ctx, GLuint nr_prims)
 379 {
 380     context_t *context = R700_CONTEXT(ctx);
 381     struct r700_vertex_program *vp = context->selected_vp;
 382     GLboolean flushed;
 383     GLuint dwords, i;
 384     GLuint state_size;
 385     /* pre calculate aos count so state prediction works */
 386     context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead);
 387
 388     dwords = PRE_EMIT_STATE_BUFSZ;
 389     if (nr_prims)
 390             dwords += nr_prims * 14;
 391     else {
 392             TNLcontext *tnl = TNL_CONTEXT(ctx);
 393             struct vertex_buffer *vb = &tnl->vb;
 394
 395             for (i = 0; i < vb->PrimitiveCount; i++)
 396                     dwords += vb->Primitive[i].count + 10;
 397     }
 398     state_size = radeonCountStateEmitSize(&context->radeon);
 399     flushed = rcommonEnsureCmdBufSpace(&context->radeon,
 400             dwords + state_size, __FUNCTION__);
 401
 402     if (flushed)
 403         dwords += radeonCountStateEmitSize(&context->radeon);
 404     else
 405         dwords += state_size;
 406
 407     radeon_print(RADEON_RENDER, RADEON_VERBOSE,
 408         "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
 409     return dwords;
 410 }
 411
 412 #define CONVERT( TYPE, MACRO ) do {             \
 413         GLuint i, j, sz;                                \
 414         sz = input->Size;                               \
 415         if (input->Normalized) {                        \
 416                 for (i = 0; i < count; i++) {           \
 417                         const TYPE *in = (TYPE *)src_ptr;               \
 418                         for (j = 0; j < sz; j++) {              \
 419                                 *dst_ptr++ = MACRO(*in);                \
 420                                 in++;                           \
 421                         }                                       \
 422                         src_ptr += stride;                      \
 423                 }                                               \
 424         } else {                                        \
 425                 for (i = 0; i < count; i++) {           \
 426                         const TYPE *in = (TYPE *)src_ptr;               \
 427                         for (j = 0; j < sz; j++) {              \
 428                                 *dst_ptr++ = (GLfloat)(*in);            \
 429                                 in++;                           \
 430                         }                                       \
 431                         src_ptr += stride;                      \
 432                 }                                               \
 433         }                                               \
 434 } while (0)
 435
 436 /**
 437  * Convert attribute data type to float
 438  * If the attribute uses named buffer object replace the bo with newly allocated bo
 439  */
 440 static void r700ConvertAttrib(GLcontext *ctx, int count,
 441                               const struct gl_client_array *input,
 442                               struct StreamDesc *attr)
 443 {
 444     context_t *context = R700_CONTEXT(ctx);
 445     const GLvoid *src_ptr;
 446     GLboolean mapped_named_bo = GL_FALSE;
 447     GLfloat *dst_ptr;
 448     GLuint stride;
 449
 450     stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
 451
 452     /* Convert value for first element only */
 453     if (input->StrideB == 0)
 454     {
 455         count = 1;
 456     }
 457
 458     if (input->BufferObj->Name)
 459     {
 460         if (!input->BufferObj->Pointer)
 461         {
 462             ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
 463             mapped_named_bo = GL_TRUE;
 464         }
 465
 466         src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
 467     }
 468     else
 469     {
 470         src_ptr = input->Ptr;
 471     }
 472
 473     radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
 474                          sizeof(GLfloat) * input->Size * count, 32);
 475     dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
 476
 477     assert(src_ptr != NULL);
 478
 479     switch (input->Type)
 480     {
 481         case GL_DOUBLE:
 482             CONVERT(GLdouble, (GLfloat));
 483             break;
 484         case GL_UNSIGNED_INT:
 485             CONVERT(GLuint, UINT_TO_FLOAT);
 486             break;
 487         case GL_INT:
 488             CONVERT(GLint, INT_TO_FLOAT);
 489             break;
 490         case GL_UNSIGNED_SHORT:
 491             CONVERT(GLushort, USHORT_TO_FLOAT);
 492             break;
 493         case GL_SHORT:
 494             CONVERT(GLshort, SHORT_TO_FLOAT);
 495             break;
 496         case GL_UNSIGNED_BYTE:
 497             assert(input->Format != GL_BGRA);
 498             CONVERT(GLubyte, UBYTE_TO_FLOAT);
 499             break;
 500         case GL_BYTE:
 501             CONVERT(GLbyte, BYTE_TO_FLOAT);
 502             break;
 503         default:
 504             assert(0);
 505             break;
 506     }
 507
 508     if (mapped_named_bo)
 509     {
 510         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
 511     }
 512 }
 513
 514 static void r700AlignDataToDword(GLcontext *ctx,
 515                                  const struct gl_client_array *input,
 516                                  int count,
 517                                  struct StreamDesc *attr)
 518 {
 519     context_t *context = R700_CONTEXT(ctx);
 520     const int dst_stride = (input->StrideB + 3) & ~3;
 521     const int size = getTypeSize(input->Type) * input->Size * count;
 522     GLboolean mapped_named_bo = GL_FALSE;
 523
 524     radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
 525
 526     if (!input->BufferObj->Pointer)
 527     {
 528         ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
 529         mapped_named_bo = GL_TRUE;
 530     }
 531
 532     {
 533         GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
 534         GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
 535         int i;
 536
 537         for (i = 0; i < count; ++i)
 538         {
 539             _mesa_memcpy(dst_ptr, src_ptr, input->StrideB);
 540             src_ptr += input->StrideB;
 541             dst_ptr += dst_stride;
 542         }
 543     }
 544
 545     if (mapped_named_bo)
 546     {
 547         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
 548     }
 549
 550     attr->stride = dst_stride;
 551 }
 552
 553 static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count)
 554 {
 555         context_t *context = R700_CONTEXT(ctx);
 556     GLuint stride;
 557     int ret;
 558     int i, index;
 559
 560     R600_STATECHANGE(context, vtx);
 561
 562     for(index = 0; index < context->nNumActiveAos; index++)
 563     {
 564         struct radeon_aos *aos = &context->radeon.tcl.aos[index];
 565         i = context->stream_desc[index].element;
 566
 567         stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
 568
 569         if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
 570 #if MESA_BIG_ENDIAN
 571             getTypeSize(input[i]->Type) != 4 ||
 572 #endif
 573             stride < 4)
 574         {
 575             r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
 576         }
 577         else
 578         {
 579             if (input[i]->BufferObj->Name)
 580             {
 581                 if (stride % 4 != 0)
 582                 {
 583                     assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
 584                     r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
 585                     context->stream_desc[index].is_named_bo = GL_FALSE;
 586                 }
 587                 else
 588                 {
 589                     context->stream_desc[index].stride = input[i]->StrideB;
 590                     context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
 591                     context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
 592                     context->stream_desc[index].is_named_bo = GL_TRUE;
 593                 }
 594             }
 595             else
 596             {
 597                 int size;
 598                 int local_count = count;
 599                 uint32_t *dst;
 600
 601                 if (input[i]->StrideB == 0)
 602                 {
 603                     size = getTypeSize(input[i]->Type) * input[i]->Size;
 604                     local_count = 1;
 605                 }
 606                 else
 607                 {
 608                     size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
 609                 }
 610
 611                 radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
 612                                      &context->stream_desc[index].bo_offset, size, 32);
 613                 assert(context->stream_desc[index].bo->ptr != NULL);
 614                 dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
 615                                                context->stream_desc[index].bo_offset);
 616
 617                 switch (context->stream_desc[index].dwords)
 618                 {
 619                 case 1:
 620                     radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 621                                         context->stream_desc[index].stride = 4;
 622                     break;
 623                 case 2:
 624                     radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 625                                         context->stream_desc[index].stride = 8;
 626                     break;
 627                 case 3:
 628                     radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 629                                         context->stream_desc[index].stride = 12;
 630                     break;
 631                 case 4:
 632                     radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 633                                         context->stream_desc[index].stride = 16;
 634                     break;
 635                 default:
 636                     assert(0);
 637                     break;
 638                 }
 639             }
 640         }
 641
 642         aos->count = context->stream_desc[index].stride == 0 ? 1 : count;
 643         aos->stride = context->stream_desc[index].stride / sizeof(float);
 644         aos->components = context->stream_desc[index].dwords;
 645         aos->bo = context->stream_desc[index].bo;
 646         aos->offset = context->stream_desc[index].bo_offset;
 647
 648         if(context->stream_desc[index].is_named_bo)
 649         {
 650             radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs,
 651                                               context->stream_desc[index].bo,
 652                                               RADEON_GEM_DOMAIN_GTT, 0);
 653         }
 654     }
 655
 656     context->radeon.tcl.aos_count = context->nNumActiveAos;
 657     ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
 658                                         first_elem(&context->radeon.dma.reserved)->bo,
 659                                         RADEON_GEM_DOMAIN_GTT, 0);
 660 }
 661
 662 static void r700FreeData(GLcontext *ctx)
 663 {
 664     /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
 665      * to prevent double unref in radeonReleaseArrays
 666      * called during context destroy
 667      */
 668     context_t *context = R700_CONTEXT(ctx);
 669
 670     int i;
 671
 672     for (i = 0; i < context->nNumActiveAos; i++)
 673     {
 674         if (!context->stream_desc[i].is_named_bo)
 675         {
 676                 radeon_bo_unref(context->stream_desc[i].bo);
 677         }
 678         context->radeon.tcl.aos[i].bo = NULL;
 679     }
 680
 681     if (context->ind_buf.bo != NULL)
 682     {
 683             radeon_bo_unref(context->ind_buf.bo);
 684     }
 685 }
 686
 687 static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
 688 {
 689     context_t *context = R700_CONTEXT(ctx);
 690     GLvoid *src_ptr;
 691     GLuint *out;
 692     int i;
 693     GLboolean mapped_named_bo = GL_FALSE;
 694
 695     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
 696     {
 697         ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 698         mapped_named_bo = GL_TRUE;
 699         assert(mesa_ind_buf->obj->Pointer != NULL);
 700     }
 701     src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
 702
 703     if (mesa_ind_buf->type == GL_UNSIGNED_BYTE)
 704     {
 705         GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
 706         GLubyte *in = (GLubyte *)src_ptr;
 707
 708         radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
 709                              &context->ind_buf.bo_offset, size, 4);
 710
 711         assert(context->ind_buf.bo->ptr != NULL);
 712         out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 713
 714         for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
 715         {
 716             *out++ = in[i] | in[i + 1] << 16;
 717         }
 718
 719         if (i < mesa_ind_buf->count)
 720         {
 721             *out++ = in[i];
 722         }
 723
 724 #if MESA_BIG_ENDIAN
 725     }
 726     else
 727     { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
 728         GLushort *in = (GLushort *)src_ptr;
 729         GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
 730
 731         radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
 732                              &context->ind_buf.bo_offset, size, 4);
 733
 734         assert(context->ind_buf.bo->ptr != NULL);
 735         out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 736
 737         for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
 738         {
 739             *out++ = in[i] | in[i + 1] << 16;
 740         }
 741
 742         if (i < mesa_ind_buf->count)
 743         {
 744             *out++ = in[i];
 745         }
 746 #endif
 747     }
 748
 749     context->ind_buf.is_32bit = GL_FALSE;
 750     context->ind_buf.count = mesa_ind_buf->count;
 751
 752     if (mapped_named_bo)
 753     {
 754         ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
 755     }
 756 }
 757
 758 static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
 759 {
 760     context_t *context = R700_CONTEXT(ctx);
 761
 762     if (!mesa_ind_buf) {
 763         context->ind_buf.bo = NULL;
 764         return;
 765     }
 766
 767 #if MESA_BIG_ENDIAN
 768     if (mesa_ind_buf->type == GL_UNSIGNED_INT)
 769     {
 770 #else
 771     if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
 772     {
 773 #endif
 774         const GLvoid *src_ptr;
 775         GLvoid *dst_ptr;
 776         GLboolean mapped_named_bo = GL_FALSE;
 777
 778         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
 779         {
 780                 ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 781                 assert(mesa_ind_buf->obj->Pointer != NULL);
 782                 mapped_named_bo = GL_TRUE;
 783         }
 784
 785         src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
 786
 787         const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
 788
 789         radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
 790                              &context->ind_buf.bo_offset, size, 4);
 791         assert(context->ind_buf.bo->ptr != NULL);
 792         dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 793
 794         _mesa_memcpy(dst_ptr, src_ptr, size);
 795
 796         context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
 797         context->ind_buf.count = mesa_ind_buf->count;
 798
 799         if (mapped_named_bo)
 800         {
 801                 ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
 802         }
 803     }
 804     else
 805     {
 806             r700FixupIndexBuffer(ctx, mesa_ind_buf);
 807     }
 808 }
 809
 810 static GLboolean r700TryDrawPrims(GLcontext *ctx,
 811                                   const struct gl_client_array *arrays[],
 812                                   const struct _mesa_prim *prim,
 813                                   GLuint nr_prims,
 814                                   const struct _mesa_index_buffer *ib,
 815                                   GLuint min_index,
 816                                   GLuint max_index )
 817 {
 818     context_t *context = R700_CONTEXT(ctx);
 819     radeonContextPtr radeon = &context->radeon;
 820     GLuint i, id = 0;
 821     struct radeon_renderbuffer *rrb;
 822
 823     if (ctx->NewState)
 824         _mesa_update_state( ctx );
 825
 826     _tnl_UpdateFixedFunctionProgram(ctx);
 827     r700SetVertexFormat(ctx, arrays, max_index + 1);
 828     /* shaders need to be updated before buffers are validated */
 829     r700UpdateShaders2(ctx);
 830     if (!r600ValidateBuffers(ctx))
 831             return GL_FALSE;
 832
 833     /* always emit CB base to prevent
 834      * lock ups on some chips.
 835      */
 836     R600_STATECHANGE(context, cb_target);
 837     /* mark vtx as dirty since it changes per-draw */
 838     R600_STATECHANGE(context, vtx);
 839
 840     r700SetScissor(context);
 841     r700SetupVertexProgram(ctx);
 842     r700SetupFragmentProgram(ctx);
 843     r600UpdateTextureState(ctx);
 844
 845     GLuint emit_end = r700PredictRenderSize(ctx, nr_prims)
 846                     + context->radeon.cmdbuf.cs->cdw;
 847
 848     r700SetupIndexBuffer(ctx, ib);
 849     r700SetupStreams2(ctx, arrays, max_index + 1);
 850
 851     radeonEmitState(radeon);
 852
 853     radeon_debug_add_indent();
 854     for (i = 0; i < nr_prims; ++i)
 855     {
 856             r700RunRenderPrimitive(ctx,
 857                                prim[i].start,
 858                                prim[i].start + prim[i].count,
 859                                prim[i].mode);
 860     }
 861     radeon_debug_remove_indent();
 862
 863     /* Flush render op cached for last several quads. */
 864     r700WaitForIdleClean(context);
 865
 866     rrb = radeon_get_colorbuffer(&context->radeon);
 867     if (rrb && rrb->bo)
 868             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
 869                          CB_ACTION_ENA_bit | (1 << (id + 6)));
 870
 871     rrb = radeon_get_depthbuffer(&context->radeon);
 872     if (rrb && rrb->bo)
 873             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
 874                          DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
 875
 876     r700FreeData(ctx);
 877
 878     if (emit_end < context->radeon.cmdbuf.cs->cdw)
 879     {
 880         WARN_ONCE("Rendering was %d commands larger than predicted size."
 881             " We might overflow  command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
 882     }
 883
 884     return GL_TRUE;
 885 }
 886
 887 static void r700DrawPrims(GLcontext *ctx,
 888                           const struct gl_client_array *arrays[],
 889                           const struct _mesa_prim *prim,
 890                           GLuint nr_prims,
 891                           const struct _mesa_index_buffer *ib,
 892                           GLboolean index_bounds_valid,
 893                           GLuint min_index,
 894                           GLuint max_index)
 895 {
 896         GLboolean retval = GL_FALSE;
 897
 898         /* This check should get folded into just the places that
 899          * min/max index are really needed.
 900          */
 901         if (!index_bounds_valid) {
 902                 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
 903         }
 904
 905         if (min_index) {
 906                 vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
 907                 return;
 908         }
 909
 910         /* Make an attempt at drawing */
 911         retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
 912
 913         /* If failed run tnl pipeline - it should take care of fallbacks */
 914         if (!retval)
 915                 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
 916 }
 917
 918 void r700InitDraw(GLcontext *ctx)
 919 {
 920         struct vbo_context *vbo = vbo_context(ctx);
 921
 922         /* to be enabled */
 923         vbo->draw_prims = r700DrawPrims;
 924 }
 925
 926