src/mesa/drivers/dri/r600/r700_render.c

   1 /*
   2  * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  */
  21
  22 /*
  23  * Authors:
  24  *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
  25  *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
  26  */
  27
  28 #include "main/glheader.h"
  29 #include "main/state.h"
  30 #include "main/imports.h"
  31 #include "main/enums.h"
  32 #include "main/macros.h"
  33 #include "main/context.h"
  34 #include "main/dd.h"
  35 #include "main/simple_list.h"
  36 #include "main/api_arrayelt.h"
  37 #include "swrast/swrast.h"
  38 #include "swrast_setup/swrast_setup.h"
  39 #include "vbo/vbo.h"
  40
  41 #include "tnl/tnl.h"
  42 #include "tnl/t_vp_build.h"
  43 #include "tnl/t_context.h"
  44 #include "tnl/t_vertex.h"
  45 #include "vbo/vbo_context.h"
  46
  47 #include "r600_context.h"
  48 #include "r600_cmdbuf.h"
  49
  50 #include "r600_tex.h"
  51
  52 #include "r700_vertprog.h"
  53 #include "r700_fragprog.h"
  54 #include "r700_state.h"
  55
  56 #include "radeon_buffer_objects.h"
  57 #include "radeon_common_context.h"
  58
  59 void r700WaitForIdle(context_t *context);
  60 void r700WaitForIdleClean(context_t *context);
  61 static unsigned int r700PrimitiveType(int prim);
  62 GLboolean r700SyncSurf(context_t *context,
  63                        struct radeon_bo *pbo,
  64                        uint32_t read_domain,
  65                        uint32_t write_domain,
  66                        uint32_t sync_type);
  67
  68 void r700WaitForIdle(context_t *context)
  69 {
  70     BATCH_LOCALS(&context->radeon);
  71     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
  72     BEGIN_BATCH_NO_AUTOSTATE(3);
  73
  74     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
  75     R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
  76     R600_OUT_BATCH(WAIT_3D_IDLE_bit);
  77
  78     END_BATCH();
  79     COMMIT_BATCH();
  80 }
  81
  82 void r700WaitForIdleClean(context_t *context)
  83 {
  84     BATCH_LOCALS(&context->radeon);
  85     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
  86     BEGIN_BATCH_NO_AUTOSTATE(5);
  87
  88     R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
  89     R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
  90
  91     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
  92     R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
  93     R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
  94
  95     END_BATCH();
  96     COMMIT_BATCH();
  97 }
  98
  99 void r700Start3D(context_t *context)
 100 {
 101     BATCH_LOCALS(&context->radeon);
 102     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
 103     if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
 104     {
 105         BEGIN_BATCH_NO_AUTOSTATE(2);
 106         R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
 107         R600_OUT_BATCH(0);
 108         END_BATCH();
 109     }
 110
 111     BEGIN_BATCH_NO_AUTOSTATE(3);
 112     R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
 113     R600_OUT_BATCH(0x80000000);
 114     R600_OUT_BATCH(0x80000000);
 115     END_BATCH();
 116
 117     COMMIT_BATCH();
 118 }
 119
 120 GLboolean r700SyncSurf(context_t *context,
 121                        struct radeon_bo *pbo,
 122                        uint32_t read_domain,
 123                        uint32_t write_domain,
 124                        uint32_t sync_type)
 125 {
 126     BATCH_LOCALS(&context->radeon);
 127     radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
 128     uint32_t cp_coher_size;
 129
 130     if (!pbo)
 131             return GL_FALSE;
 132
 133     if (pbo->size == 0xffffffff)
 134             cp_coher_size = 0xffffffff;
 135     else
 136             cp_coher_size = ((pbo->size + 255) >> 8);
 137
 138     BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
 139     R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
 140     R600_OUT_BATCH(sync_type);
 141     R600_OUT_BATCH(cp_coher_size);
 142     R600_OUT_BATCH(0);
 143     R600_OUT_BATCH(10);
 144     R600_OUT_BATCH_RELOC(0,
 145                          pbo,
 146                          0,
 147                          read_domain, write_domain, 0);
 148     END_BATCH();
 149     COMMIT_BATCH();
 150
 151     return GL_TRUE;
 152 }
 153
 154 static unsigned int r700PrimitiveType(int prim)
 155 {
 156     switch (prim & PRIM_MODE_MASK)
 157     {
 158     case GL_POINTS:
 159         return DI_PT_POINTLIST;
 160         break;
 161     case GL_LINES:
 162         return DI_PT_LINELIST;
 163         break;
 164     case GL_LINE_STRIP:
 165         return DI_PT_LINESTRIP;
 166         break;
 167     case GL_LINE_LOOP:
 168         return DI_PT_LINELOOP;
 169         break;
 170     case GL_TRIANGLES:
 171         return DI_PT_TRILIST;
 172         break;
 173     case GL_TRIANGLE_STRIP:
 174         return DI_PT_TRISTRIP;
 175         break;
 176     case GL_TRIANGLE_FAN:
 177         return DI_PT_TRIFAN;
 178         break;
 179     case GL_QUADS:
 180         return DI_PT_QUADLIST;
 181         break;
 182     case GL_QUAD_STRIP:
 183         return DI_PT_QUADSTRIP;
 184         break;
 185     case GL_POLYGON:
 186         return DI_PT_POLYGON;
 187         break;
 188     default:
 189         assert(0);
 190         return -1;
 191         break;
 192     }
 193 }
 194
 195 static int r700NumVerts(int num_verts, int prim)
 196 {
 197         int verts_off = 0;
 198
 199         switch (prim & PRIM_MODE_MASK) {
 200         case GL_POINTS:
 201                 verts_off = 0;
 202                 break;
 203         case GL_LINES:
 204                 verts_off = num_verts % 2;
 205                 break;
 206         case GL_LINE_STRIP:
 207                 if (num_verts < 2)
 208                         verts_off = num_verts;
 209                 break;
 210         case GL_LINE_LOOP:
 211                 if (num_verts < 2)
 212                         verts_off = num_verts;
 213                 break;
 214         case GL_TRIANGLES:
 215                 verts_off = num_verts % 3;
 216                 break;
 217         case GL_TRIANGLE_STRIP:
 218                 if (num_verts < 3)
 219                         verts_off = num_verts;
 220                 break;
 221         case GL_TRIANGLE_FAN:
 222                 if (num_verts < 3)
 223                         verts_off = num_verts;
 224                 break;
 225         case GL_QUADS:
 226                 verts_off = num_verts % 4;
 227                 break;
 228         case GL_QUAD_STRIP:
 229                 if (num_verts < 4)
 230                         verts_off = num_verts;
 231                 else
 232                         verts_off = num_verts % 2;
 233                 break;
 234         case GL_POLYGON:
 235                 if (num_verts < 3)
 236                         verts_off = num_verts;
 237                 break;
 238         default:
 239                 assert(0);
 240                 return -1;
 241                 break;
 242         }
 243
 244         return num_verts - verts_off;
 245 }
 246
 247 static void r700RunRenderPrimitive(struct gl_context * ctx, int start, int end,
 248                                    int prim, GLint basevertex)
 249 {
 250     context_t *context = R700_CONTEXT(ctx);
 251     BATCH_LOCALS(&context->radeon);
 252     int type, total_emit;
 253     int num_indices;
 254     uint32_t vgt_draw_initiator = 0;
 255     uint32_t vgt_index_type     = 0;
 256     uint32_t vgt_primitive_type = 0;
 257     uint32_t vgt_num_indices    = 0;
 258
 259     type = r700PrimitiveType(prim);
 260     num_indices = r700NumVerts(end - start, prim);
 261
 262     radeon_print(RADEON_RENDER, RADEON_TRACE,
 263                  "%s type %x num_indices %d\n",
 264                  __func__, type, num_indices);
 265
 266     if (type < 0 || num_indices <= 0)
 267             return;
 268
 269     SETfield(vgt_primitive_type, type,
 270              VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
 271
 272     SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
 273
 274     if(GL_TRUE != context->ind_buf.is_32bit)
 275     {
 276             SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
 277     }
 278
 279         /* 16-bit indexes are packed in a 32-bit value */
 280         SETfield(vgt_index_type,
 281 #if MESA_BIG_ENDIAN
 282                         VGT_DMA_SWAP_32_BIT,
 283 #else
 284                         VGT_DMA_SWAP_NONE,
 285 #endif
 286                         SWAP_MODE_shift, SWAP_MODE_mask);
 287
 288
 289     vgt_num_indices = num_indices;
 290     SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
 291     SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
 292
 293     total_emit =   3  /* VGT_PRIMITIVE_TYPE */
 294                  + 2  /* VGT_INDEX_TYPE */
 295                  + 2  /* NUM_INSTANCES */
 296                  + 4  /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
 297                  + 5 + 2; /* DRAW_INDEX */
 298
 299     BEGIN_BATCH_NO_AUTOSTATE(total_emit);
 300     // prim
 301     R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
 302     R600_OUT_BATCH(vgt_primitive_type);
 303     // index type
 304     R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
 305     R600_OUT_BATCH(vgt_index_type);
 306     // num instances
 307     R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
 308     R600_OUT_BATCH(1);
 309     /* offset */
 310     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
 311     R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
 312     R600_OUT_BATCH(basevertex); //VTX_BASE_VTX_LOC
 313     R600_OUT_BATCH(0);          //VTX_START_INST_LOC
 314     // draw packet
 315     R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
 316     R600_OUT_BATCH(context->ind_buf.bo_offset);
 317     R600_OUT_BATCH(0);
 318     R600_OUT_BATCH(vgt_num_indices);
 319     R600_OUT_BATCH(vgt_draw_initiator);
 320     R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
 321                          context->ind_buf.bo,
 322                          context->ind_buf.bo_offset,
 323                          RADEON_GEM_DOMAIN_GTT, 0, 0);
 324     END_BATCH();
 325     COMMIT_BATCH();
 326 }
 327
 328 static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, int end, int prim)
 329 {
 330     context_t *context = R700_CONTEXT(ctx);
 331     BATCH_LOCALS(&context->radeon);
 332     int type;
 333     uint32_t num_indices, total_emit = 0;
 334     uint32_t vgt_draw_initiator = 0;
 335     uint32_t vgt_index_type     = 0;
 336     uint32_t vgt_primitive_type = 0;
 337     uint32_t vgt_num_indices    = 0;
 338
 339     type = r700PrimitiveType(prim);
 340     num_indices = r700NumVerts(end - start, prim);
 341
 342     radeon_print(RADEON_RENDER, RADEON_TRACE,
 343                  "%s type %x num_indices %d\n",
 344                  __func__, type, num_indices);
 345
 346     if (type < 0 || num_indices <= 0)
 347             return;
 348
 349     SETfield(vgt_primitive_type, type,
 350              VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
 351
 352     if (num_indices > 0xffff)
 353     {
 354             SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
 355     }
 356     else
 357     {
 358             SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
 359     }
 360
 361         /* 16-bit indexes are packed in a 32-bit value */
 362         SETfield(vgt_index_type,
 363 #if MESA_BIG_ENDIAN
 364                         VGT_DMA_SWAP_32_BIT,
 365 #else
 366                         VGT_DMA_SWAP_NONE,
 367 #endif
 368                         SWAP_MODE_shift, SWAP_MODE_mask);
 369
 370     vgt_num_indices = num_indices;
 371     SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
 372
 373     SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
 374
 375     total_emit +=   3 /* VGT_PRIMITIVE_TYPE */
 376                   + 2 /* VGT_INDEX_TYPE */
 377                   + 2 /* NUM_INSTANCES */
 378                   + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
 379                   + 3; /* DRAW */
 380
 381     BEGIN_BATCH_NO_AUTOSTATE(total_emit);
 382     // prim
 383     R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
 384     R600_OUT_BATCH(vgt_primitive_type);
 385     // index type
 386     R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
 387     R600_OUT_BATCH(vgt_index_type);
 388     // num instances
 389     R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
 390     R600_OUT_BATCH(1);
 391     /* offset */
 392     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
 393     R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
 394     R600_OUT_BATCH(start); //VTX_BASE_VTX_LOC
 395     R600_OUT_BATCH(0); //VTX_START_INST_LOC
 396     // draw packet
 397
 398     R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
 399     R600_OUT_BATCH(vgt_num_indices);
 400     R600_OUT_BATCH(vgt_draw_initiator);
 401
 402     END_BATCH();
 403     COMMIT_BATCH();
 404 }
 405
 406 /* start 3d, idle, cb/db flush */
 407 #define PRE_EMIT_STATE_BUFSZ 5 + 5 + 14
 408
 409 static GLuint r700PredictRenderSize(struct gl_context* ctx,
 410                                     const struct _mesa_prim *prim,
 411                                     const struct _mesa_index_buffer *ib,
 412                                     GLuint nr_prims)
 413 {
 414     context_t *context = R700_CONTEXT(ctx);
 415     GLboolean flushed;
 416     GLuint dwords, i;
 417     GLuint state_size;
 418
 419     dwords = PRE_EMIT_STATE_BUFSZ;
 420     if (ib)
 421             dwords += nr_prims * 18;
 422     else {
 423             for (i = 0; i < nr_prims; ++i)
 424             {
 425                 dwords += 14;
 426             }
 427     }
 428
 429     state_size = radeonCountStateEmitSize(&context->radeon);
 430     flushed = rcommonEnsureCmdBufSpace(&context->radeon,
 431                                        dwords + state_size,
 432                                        __FUNCTION__);
 433     if (flushed)
 434             dwords += radeonCountStateEmitSize(&context->radeon);
 435     else
 436             dwords += state_size;
 437
 438     radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
 439     return dwords;
 440
 441 }
 442
 443 #define CONVERT( TYPE, MACRO ) do {             \
 444         GLuint i, j, sz;                                \
 445         sz = input->Size;                               \
 446         if (input->Normalized) {                        \
 447                 for (i = 0; i < count; i++) {           \
 448                         const TYPE *in = (TYPE *)src_ptr;               \
 449                         for (j = 0; j < sz; j++) {              \
 450                                 *dst_ptr++ = MACRO(*in);                \
 451                                 in++;                           \
 452                         }                                       \
 453                         src_ptr += stride;                      \
 454                 }                                               \
 455         } else {                                        \
 456                 for (i = 0; i < count; i++) {           \
 457                         const TYPE *in = (TYPE *)src_ptr;               \
 458                         for (j = 0; j < sz; j++) {              \
 459                                 *dst_ptr++ = (GLfloat)(*in);            \
 460                                 in++;                           \
 461                         }                                       \
 462                         src_ptr += stride;                      \
 463                 }                                               \
 464         }                                               \
 465 } while (0)
 466
 467 /**
 468  * Convert attribute data type to float
 469  * If the attribute uses named buffer object replace the bo with newly allocated bo
 470  */
 471 static void r700ConvertAttrib(struct gl_context *ctx, int count,
 472                               const struct gl_client_array *input,
 473                               struct StreamDesc *attr)
 474 {
 475     context_t *context = R700_CONTEXT(ctx);
 476     const GLvoid *src_ptr;
 477     GLboolean mapped_named_bo = GL_FALSE;
 478     GLfloat *dst_ptr;
 479     GLuint stride;
 480
 481     stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
 482
 483     /* Convert value for first element only */
 484     if (input->StrideB == 0)
 485     {
 486         count = 1;
 487     }
 488
 489     if (input->BufferObj->Name)
 490     {
 491         if (!input->BufferObj->Pointer)
 492         {
 493             ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
 494             mapped_named_bo = GL_TRUE;
 495         }
 496
 497         src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
 498     }
 499     else
 500     {
 501         src_ptr = input->Ptr;
 502     }
 503
 504     radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
 505                          sizeof(GLfloat) * input->Size * count, 32);
 506
 507     radeon_bo_map(attr->bo, 1);
 508
 509     dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
 510
 511     assert(src_ptr != NULL);
 512
 513     switch (input->Type)
 514     {
 515         case GL_DOUBLE:
 516             CONVERT(GLdouble, (GLfloat));
 517             break;
 518         case GL_UNSIGNED_INT:
 519             CONVERT(GLuint, UINT_TO_FLOAT);
 520             break;
 521         case GL_INT:
 522             CONVERT(GLint, INT_TO_FLOAT);
 523             break;
 524         case GL_UNSIGNED_SHORT:
 525             CONVERT(GLushort, USHORT_TO_FLOAT);
 526             break;
 527         case GL_SHORT:
 528             CONVERT(GLshort, SHORT_TO_FLOAT);
 529             break;
 530         case GL_UNSIGNED_BYTE:
 531             assert(input->Format != GL_BGRA);
 532             CONVERT(GLubyte, UBYTE_TO_FLOAT);
 533             break;
 534         case GL_BYTE:
 535             CONVERT(GLbyte, BYTE_TO_FLOAT);
 536             break;
 537         default:
 538             assert(0);
 539             break;
 540     }
 541
 542     radeon_bo_unmap(attr->bo);
 543
 544     if (mapped_named_bo)
 545     {
 546         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
 547     }
 548 }
 549
 550 #if 0 /* unused */
 551 static void r700AlignDataToDword(struct gl_context *ctx,
 552                                  const struct gl_client_array *input,
 553                                  int count,
 554                                  struct StreamDesc *attr)
 555 {
 556     context_t *context = R700_CONTEXT(ctx);
 557     const int dst_stride = (input->StrideB + 3) & ~3;
 558     const int size = getTypeSize(input->Type) * input->Size * count;
 559     GLboolean mapped_named_bo = GL_FALSE;
 560
 561     radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
 562
 563     radeon_bo_map(attr->bo, 1);
 564
 565     if (!input->BufferObj->Pointer)
 566     {
 567         ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
 568         mapped_named_bo = GL_TRUE;
 569     }
 570
 571     {
 572         GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
 573         GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
 574         int i;
 575
 576         for (i = 0; i < count; ++i)
 577         {
 578             memcpy(dst_ptr, src_ptr, input->StrideB);
 579             src_ptr += input->StrideB;
 580             dst_ptr += dst_stride;
 581         }
 582     }
 583
 584     radeon_bo_unmap(attr->bo);
 585     if (mapped_named_bo)
 586     {
 587         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
 588     }
 589
 590     attr->stride = dst_stride;
 591 }
 592 #endif
 593
 594 static void r700SetupStreams(struct gl_context *ctx, const struct gl_client_array *input[], int count)
 595 {
 596         context_t *context = R700_CONTEXT(ctx);
 597     GLuint stride;
 598     int ret;
 599     int i, index;
 600
 601     R600_STATECHANGE(context, vtx);
 602
 603     for(index = 0; index < context->nNumActiveAos; index++)
 604     {
 605         struct radeon_aos *aos = &context->radeon.tcl.aos[index];
 606         i = context->stream_desc[index].element;
 607
 608         stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
 609
 610         if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT
 611 #if MESA_BIG_ENDIAN
 612             || getTypeSize(input[i]->Type) != 4
 613 #endif
 614             )
 615         {
 616             assert(count);
 617             r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
 618         }
 619         else
 620         {
 621             if (input[i]->BufferObj->Name)
 622             {
 623                 context->stream_desc[index].stride = input[i]->StrideB;
 624                 context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
 625                 context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
 626                 context->stream_desc[index].is_named_bo = GL_TRUE;
 627             }
 628             else
 629             {
 630                 int size;
 631                 int local_count = count;
 632                 uint32_t *dst;
 633
 634                 if (input[i]->StrideB == 0)
 635                 {
 636                     size = getTypeSize(input[i]->Type) * input[i]->Size;
 637                     local_count = 1;
 638                 }
 639                 else
 640                 {
 641                     size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
 642                 }
 643
 644                 radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
 645                                      &context->stream_desc[index].bo_offset, size, 32);
 646
 647                 radeon_bo_map(context->stream_desc[index].bo, 1);
 648                 assert(context->stream_desc[index].bo->ptr != NULL);
 649
 650
 651                 dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
 652                                                context->stream_desc[index].bo_offset);
 653
 654                 switch (context->stream_desc[index].dwords)
 655                 {
 656                 case 1:
 657                     radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 658                     break;
 659                 case 2:
 660                     radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 661                     break;
 662                 case 3:
 663                     radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 664                     break;
 665                 case 4:
 666                     radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count);
 667                     break;
 668                 default:
 669                     assert(0);
 670                     break;
 671                 }
 672                 radeon_bo_unmap(context->stream_desc[index].bo);
 673             }
 674         }
 675
 676         aos->count = context->stream_desc[index].stride == 0 ? 1 : count;
 677         aos->stride = context->stream_desc[index].stride / sizeof(float);
 678         aos->components = context->stream_desc[index].dwords;
 679         aos->bo = context->stream_desc[index].bo;
 680         aos->offset = context->stream_desc[index].bo_offset;
 681
 682         if(context->stream_desc[index].is_named_bo)
 683         {
 684             radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs,
 685                                               context->stream_desc[index].bo,
 686                                               RADEON_GEM_DOMAIN_GTT, 0);
 687         }
 688     }
 689
 690     ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
 691                                         first_elem(&context->radeon.dma.reserved)->bo,
 692                                         RADEON_GEM_DOMAIN_GTT, 0);
 693 }
 694
 695 static void r700FreeData(struct gl_context *ctx)
 696 {
 697     /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
 698      * to prevent double unref in radeonReleaseArrays
 699      * called during context destroy
 700      */
 701     context_t *context = R700_CONTEXT(ctx);
 702
 703     int i;
 704
 705     for (i = 0; i < context->nNumActiveAos; i++)
 706     {
 707         if (!context->stream_desc[i].is_named_bo)
 708         {
 709                 radeon_bo_unref(context->stream_desc[i].bo);
 710         }
 711         context->radeon.tcl.aos[i].bo = NULL;
 712     }
 713
 714     if (context->ind_buf.bo != NULL)
 715     {
 716             radeon_bo_unref(context->ind_buf.bo);
 717     }
 718 }
 719
 720 static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
 721 {
 722     context_t *context = R700_CONTEXT(ctx);
 723     GLvoid *src_ptr;
 724     GLuint *out;
 725     int i;
 726     GLboolean mapped_named_bo = GL_FALSE;
 727
 728     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
 729     {
 730         ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 731         mapped_named_bo = GL_TRUE;
 732         assert(mesa_ind_buf->obj->Pointer != NULL);
 733     }
 734     src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
 735
 736     if (mesa_ind_buf->type == GL_UNSIGNED_BYTE)
 737     {
 738         GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
 739         GLubyte *in = (GLubyte *)src_ptr;
 740
 741         radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
 742                              &context->ind_buf.bo_offset, size, 4);
 743
 744         radeon_bo_map(context->ind_buf.bo, 1);
 745         assert(context->ind_buf.bo->ptr != NULL);
 746         out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 747
 748         for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
 749         {
 750             *out++ = in[i] | in[i + 1] << 16;
 751         }
 752
 753         if (i < mesa_ind_buf->count)
 754         {
 755             *out++ = in[i];
 756         }
 757
 758         radeon_bo_unmap(context->ind_buf.bo);
 759 #if MESA_BIG_ENDIAN
 760     }
 761     else
 762     { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
 763         GLushort *in = (GLushort *)src_ptr;
 764         GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
 765
 766         radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
 767                              &context->ind_buf.bo_offset, size, 4);
 768
 769         radeon_bo_map(context->ind_buf.bo, 1);
 770         assert(context->ind_buf.bo->ptr != NULL);
 771         out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 772
 773         for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
 774         {
 775             *out++ = in[i] | in[i + 1] << 16;
 776         }
 777
 778         if (i < mesa_ind_buf->count)
 779         {
 780             *out++ = in[i];
 781         }
 782         radeon_bo_unmap(context->ind_buf.bo);
 783 #endif
 784     }
 785
 786     context->ind_buf.is_32bit = GL_FALSE;
 787     context->ind_buf.count = mesa_ind_buf->count;
 788
 789     if (mapped_named_bo)
 790     {
 791         ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
 792     }
 793 }
 794
 795 static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
 796 {
 797     context_t *context = R700_CONTEXT(ctx);
 798
 799     if (!mesa_ind_buf) {
 800         context->ind_buf.bo = NULL;
 801         return;
 802     }
 803
 804 #if MESA_BIG_ENDIAN
 805     if (mesa_ind_buf->type == GL_UNSIGNED_INT)
 806 #else
 807     if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
 808 #endif
 809     {
 810         const GLvoid *src_ptr;
 811         GLvoid *dst_ptr;
 812         GLboolean mapped_named_bo = GL_FALSE;
 813
 814         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
 815         {
 816                 ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 817                 assert(mesa_ind_buf->obj->Pointer != NULL);
 818                 mapped_named_bo = GL_TRUE;
 819         }
 820
 821         src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
 822
 823         const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
 824
 825         radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
 826                              &context->ind_buf.bo_offset, size, 4);
 827         radeon_bo_map(context->ind_buf.bo, 1);
 828         assert(context->ind_buf.bo->ptr != NULL);
 829         dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 830
 831         memcpy(dst_ptr, src_ptr, size);
 832
 833         radeon_bo_unmap(context->ind_buf.bo);
 834         context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
 835         context->ind_buf.count = mesa_ind_buf->count;
 836
 837         if (mapped_named_bo)
 838         {
 839                 ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
 840         }
 841     }
 842     else
 843     {
 844             r700FixupIndexBuffer(ctx, mesa_ind_buf);
 845     }
 846 }
 847
 848 static GLboolean check_fallbacks(struct gl_context *ctx)
 849 {
 850         if (ctx->RenderMode != GL_RENDER)
 851                 return GL_TRUE;
 852
 853         return GL_FALSE;
 854 }
 855
 856 static GLboolean r700TryDrawPrims(struct gl_context *ctx,
 857                                   const struct gl_client_array *arrays[],
 858                                   const struct _mesa_prim *prim,
 859                                   GLuint nr_prims,
 860                                   const struct _mesa_index_buffer *ib,
 861                                   GLuint min_index,
 862                                   GLuint max_index )
 863 {
 864     context_t *context = R700_CONTEXT(ctx);
 865     radeonContextPtr radeon = &context->radeon;
 866     GLuint i, id = 0;
 867     struct radeon_renderbuffer *rrb;
 868
 869     if (ctx->NewState)
 870         _mesa_update_state( ctx );
 871
 872     if (check_fallbacks(ctx))
 873             return GL_FALSE;
 874
 875     _tnl_UpdateFixedFunctionProgram(ctx);
 876     r700SetVertexFormat(ctx, arrays, max_index + 1);
 877     /* shaders need to be updated before buffers are validated */
 878     r700UpdateShaders(ctx);
 879     if (!r600ValidateBuffers(ctx))
 880             return GL_FALSE;
 881
 882     /* always emit CB base to prevent
 883      * lock ups on some chips.
 884      */
 885     R600_STATECHANGE(context, cb_target);
 886     /* mark vtx as dirty since it changes per-draw */
 887     R600_STATECHANGE(context, vtx);
 888
 889     r700SetScissor(context);
 890     r700SetupVertexProgram(ctx);
 891     r700SetupFragmentProgram(ctx);
 892     r700UpdateShaderStates(ctx);
 893
 894     GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims)
 895                     + context->radeon.cmdbuf.cs->cdw;
 896
 897     r700SetupIndexBuffer(ctx, ib);
 898     r700SetupStreams(ctx, arrays, max_index + 1);
 899
 900     radeonEmitState(radeon);
 901
 902     radeon_debug_add_indent();
 903     for (i = 0; i < nr_prims; ++i)
 904     {
 905             if (context->ind_buf.bo)
 906                     r700RunRenderPrimitive(ctx,
 907                                            prim[i].start,
 908                                            prim[i].start + prim[i].count,
 909                                            prim[i].mode,
 910                                            prim[i].basevertex);
 911             else
 912                     r700RunRenderPrimitiveImmediate(ctx,
 913                                                     prim[i].start,
 914                                                     prim[i].start + prim[i].count,
 915                                                     prim[i].mode);
 916     }
 917     radeon_debug_remove_indent();
 918
 919     /* Flush render op cached for last several quads. */
 920     /* XXX drm should handle this in fence submit */
 921     r700WaitForIdleClean(context);
 922
 923     rrb = radeon_get_colorbuffer(&context->radeon);
 924     if (rrb && rrb->bo)
 925             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
 926                          CB_ACTION_ENA_bit | (1 << (id + 6)));
 927
 928     rrb = radeon_get_depthbuffer(&context->radeon);
 929     if (rrb && rrb->bo)
 930             r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
 931                          DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
 932
 933     r700FreeData(ctx);
 934
 935     if (emit_end < context->radeon.cmdbuf.cs->cdw)
 936     {
 937         WARN_ONCE("Rendering was %d commands larger than predicted size."
 938             " We might overflow  command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
 939     }
 940
 941     return GL_TRUE;
 942 }
 943
 944 static void r700DrawPrims(struct gl_context *ctx,
 945                           const struct gl_client_array *arrays[],
 946                           const struct _mesa_prim *prim,
 947                           GLuint nr_prims,
 948                           const struct _mesa_index_buffer *ib,
 949                           GLboolean index_bounds_valid,
 950                           GLuint min_index,
 951                           GLuint max_index)
 952 {
 953         GLboolean retval = GL_FALSE;
 954
 955         context_t *context = R700_CONTEXT(ctx);
 956         radeonContextPtr radeon = &context->radeon;
 957         radeon_prepare_render(radeon);
 958
 959         /* This check should get folded into just the places that
 960          * min/max index are really needed.
 961          */
 962
 963         if (!vbo_all_varyings_in_vbos(arrays)) {
 964             if (!index_bounds_valid)
 965                 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
 966             /* do we want to rebase, minimizes the
 967              * amount of data to upload? */
 968             if (min_index) {
 969                 vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
 970                 return;
 971             }
 972         }
 973         /* Make an attempt at drawing */
 974         retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
 975
 976         /* If failed run tnl pipeline - it should take care of fallbacks */
 977         if (!retval) {
 978                 _swsetup_Wakeup(ctx);
 979                 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
 980         }
 981 }
 982
 983 void r700InitDraw(struct gl_context *ctx)
 984 {
 985         struct vbo_context *vbo = vbo_context(ctx);
 986
 987         /* to be enabled */
 988         vbo->draw_prims = r700DrawPrims;
 989 }
 990
 991