src/gallium/drivers/nvfx/nvfx_vbo.c

   1 #include "pipe/p_context.h"
   2 #include "pipe/p_state.h"
   3 #include "util/u_inlines.h"
   4 #include "util/u_format.h"
   5 #include "translate/translate.h"
   6
   7 #include "nvfx_context.h"
   8 #include "nvfx_state.h"
   9 #include "nvfx_resource.h"
  10
  11 #include "nouveau/nouveau_channel.h"
  12
  13 #include "nouveau/nouveau_pushbuf.h"
  14
  15 static inline unsigned
  16 util_guess_unique_indices_count(unsigned mode, unsigned indices)
  17 {
  18         /* Euler's formula gives V =
  19          * = E - F + 2 =
  20          * = F * (polygon_edges / 2 - 1) + 2 =
  21          * =  F * (polygon_edges - 2) / 2 + 2 =
  22          * =  indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
  23          * =  indices * (1 / 2 - 1 / polygon_edges) + 2
  24          */
  25         switch(mode)
  26         {
  27         case PIPE_PRIM_LINES:
  28                 return indices >> 1;
  29         case PIPE_PRIM_TRIANGLES:
  30         {
  31                 // avoid an expensive division by 3 using the multiplicative inverse mod 2^32
  32                 unsigned q;
  33                 unsigned inv3 = 2863311531;
  34                 indices >>= 1;
  35                 q = indices * inv3;
  36                 if(unlikely(q >= indices))
  37                 {
  38                         q += inv3;
  39                         if(q >= indices)
  40                                 q += inv3;
  41                 }
  42                 return indices + 2;
  43                 //return indices / 6 + 2;
  44         }
  45         // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
  46         case PIPE_PRIM_QUADS:
  47                 return (indices >> 1) + 2;
  48         //      return (indices >> 2) + 2; // if it is a closed mesh
  49         default:
  50                 return indices;
  51         }
  52 }
  53
  54 static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
  55 {
  56         struct nvfx_context* nvfx = nvfx_context(pipe);
  57         unsigned hardware_cost = 0;
  58         unsigned inline_cost = 0;
  59         unsigned unique_vertices;
  60         unsigned upload_mode;
  61         float best_index_cost_for_hardware_vertices_as_inline_cost;
  62         boolean prefer_hardware_indices;
  63         unsigned index_inline_cost;
  64         unsigned index_hardware_cost;
  65         if (info->indexed)
  66                 unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
  67         else
  68                 unique_vertices = info->count;
  69
  70         /* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
  71          * or create hardware buffer objects and pointing the hardware to them.
  72          *
  73          * This is done by computing the total memcpy cost of each option, ignoring uploads
  74          * if we think that the buffer is static and thus the upload cost will be amortized over
  75          * future draw calls.
  76          *
  77          * For instance, if everything looks static, we will always create buffer objects, while if
  78          * everything is a user buffer and we are not doing indexed drawing, we never do.
  79          *
  80          * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
  81          * where we will upload the vertex buffer, so that we can use hardware index lookup, and
  82          * the opposite case, where we instead do index lookup in software to avoid uploading
  83          * a huge amount of vertex data that is not going to be used.
  84          *
  85          * Otherwise, we generally move to the GPU the after it has been pushed
  86          * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
  87          * been updated with a transfer (or just the buffer having been destroyed).
  88          *
  89          * There is no special handling for user buffers, since applications can use
  90          * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
  91          * by the way.
  92          *
  93          * Note that currently we don't support only putting some data on the FIFO, and
  94          * some on vertex buffers (constant and instanced data is independent from this).
  95          *
  96          * nVidia doesn't seem to do this either, even though it should be at least
  97          * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
  98          */
  99
 100         for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
 101         {
 102                 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
 103                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
 104                 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 105                 buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
 106                 if (!nvfx_buffer_seems_static(buffer))
 107                 {
 108                         hardware_cost += buffer->dirty_end - buffer->dirty_begin;
 109                         if (!buffer->base.bo)
 110                                 hardware_cost += nvfx->screen->buffer_allocation_cost;
 111                 }
 112                 inline_cost += vbi->per_vertex_size * info->count;
 113         }
 114
 115         best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
 116         prefer_hardware_indices = FALSE;
 117         index_inline_cost = 0;
 118         index_hardware_cost = 0;
 119
 120         if (info->indexed)
 121         {
 122                 index_inline_cost = nvfx->idxbuf.index_size * info->count;
 123                 if (nvfx->screen->index_buffer_reloc_flags
 124                         && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
 125                         && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
 126                 {
 127                         struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
 128                         buffer->bytes_to_draw_until_static -= index_inline_cost;
 129
 130                         prefer_hardware_indices = TRUE;
 131
 132                         if (!nvfx_buffer_seems_static(buffer))
 133                         {
 134                                 index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
 135                                 if (!buffer->base.bo)
 136                                         index_hardware_cost += nvfx->screen->buffer_allocation_cost;
 137                         }
 138
 139                         if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
 140                         {
 141                                 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
 142                         }
 143                         else
 144                         {
 145                                 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
 146                                 prefer_hardware_indices = TRUE;
 147                         }
 148                 }
 149         }
 150
 151         /* let's finally figure out which of the 3 paths we want to take */
 152         if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
 153                 upload_mode = 1 + prefer_hardware_indices;
 154         else
 155                 upload_mode = 0;
 156
 157 #ifdef DEBUG
 158         if (unlikely(nvfx->screen->trace_draw))
 159           {
 160                   fprintf(stderr, "DRAW");
 161                   if (info->indexed)
 162                   {
 163                           fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
 164                           if (info->index_bias)
 165                                   fprintf(stderr, " biased %u", info->index_bias);
 166                           fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
 167                   }
 168                   if (info->instance_count > 1)
 169                           fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
 170                   fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
 171                   if (!upload_mode)
 172                           fprintf(stderr, " -> inline vertex data");
 173                   else if (upload_mode == 2 || !info->indexed)
 174                           fprintf(stderr, " -> buffer range");
 175                   else
 176                           fprintf(stderr, " -> inline indices");
 177                   fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
 178                   for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
 179                   {
 180                           struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
 181                           struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
 182                           struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 183                           if (i)
 184                                   fprintf(stderr, ", ");
 185                           fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
 186                   }
 187                   fprintf(stderr, ">\n");
 188           }
 189 #endif
 190
 191         return upload_mode;
 192 }
 193
 194 void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 195 {
 196         struct nvfx_context *nvfx = nvfx_context(pipe);
 197         unsigned upload_mode = 0;
 198
 199         if (!nvfx->vtxelt->needs_translate)
 200                 upload_mode = nvfx_decide_upload_mode(pipe, info);
 201
 202         nvfx->use_index_buffer = upload_mode > 1;
 203
 204         if ((upload_mode > 0) != nvfx->use_vertex_buffers)
 205         {
 206                 nvfx->use_vertex_buffers = (upload_mode > 0);
 207                 nvfx->dirty |= NVFX_NEW_ARRAYS;
 208                 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 209         }
 210
 211         if (upload_mode > 0)
 212         {
 213                 for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
 214                 {
 215                         struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
 216                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
 217                         nvfx_buffer_upload(nvfx_buffer(vb->buffer));
 218                 }
 219
 220                 if (upload_mode > 1)
 221                 {
 222                         nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
 223
 224                         if (unlikely(info->index_bias != nvfx->base_vertex))
 225                         {
 226                                 nvfx->base_vertex = info->index_bias;
 227                                 nvfx->dirty |= NVFX_NEW_ARRAYS;
 228                         }
 229                 }
 230                 else
 231                 {
 232                         if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
 233                         {
 234                                 nvfx->base_vertex = 0;
 235                                 nvfx->dirty |= NVFX_NEW_ARRAYS;
 236                         }
 237                 }
 238         }
 239
 240         if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
 241                 nvfx_draw_vbo_swtnl(pipe, info);
 242         else
 243                 nvfx_push_vbo(pipe, info);
 244 }
 245
 246 boolean
 247 nvfx_vbo_validate(struct nvfx_context *nvfx)
 248 {
 249         struct nouveau_channel* chan = nvfx->screen->base.channel;
 250         int i;
 251         int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
 252         unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
 253
 254         if (!elements)
 255                 return TRUE;
 256
 257         MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
 258         for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
 259         {
 260                 struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
 261                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 262                 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 263                 float v[4];
 264                 ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
 265                 nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
 266         }
 267
 268
 269         OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
 270         if(nvfx->use_vertex_buffers)
 271         {
 272                 unsigned idx = 0;
 273                 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
 274                         struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
 275                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 276
 277                         if(idx != ve->idx)
 278                         {
 279                                 assert(idx < ve->idx);
 280                                 OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
 281                                 idx = ve->idx;
 282                         }
 283
 284                         OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV30_3D_VTXFMT_STRIDE__SHIFT));
 285                         ++idx;
 286                 }
 287                 if(idx != nvfx->vtxelt->num_elements)
 288                         OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
 289         }
 290         else
 291                 OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
 292
 293         for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
 294                 OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT);
 295
 296         if(nvfx->is_nv4x) {
 297                 unsigned i;
 298                 /* seems to be some kind of cache flushing */
 299                 for(i = 0; i < 3; ++i) {
 300                         OUT_RING(chan, RING_3D(0x1718, 1));
 301                         OUT_RING(chan, 0);
 302                 }
 303         }
 304
 305         OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
 306         if(nvfx->use_vertex_buffers)
 307         {
 308                 unsigned idx = 0;
 309                 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
 310                         struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
 311                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 312                         struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
 313
 314                         for(; idx < ve->idx; ++idx)
 315                                 OUT_RING(chan, 0);
 316
 317                         OUT_RELOC(chan, bo,
 318                                         vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
 319                                         vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
 320                                         0, NV30_3D_VTXBUF_DMA1);
 321                         ++idx;
 322                 }
 323
 324                 for(; idx < elements; ++idx)
 325                         OUT_RING(chan, 0);
 326         }
 327         else
 328         {
 329                 for (i = 0; i < elements; i++)
 330                         OUT_RING(chan, 0);
 331         }
 332
 333         OUT_RING(chan, RING_3D(0x1710, 1));
 334         OUT_RING(chan, 0);
 335
 336         nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
 337         nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
 338         return TRUE;
 339 }
 340
 341 void
 342 nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
 343 {
 344         struct nouveau_channel* chan = nvfx->screen->base.channel;
 345         unsigned num_outputs = nvfx->vertprog->draw_elements;
 346         int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr);
 347
 348         if (!elements)
 349                 return;
 350
 351         WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
 352
 353         OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
 354         for(unsigned i = 0; i < num_outputs; ++i)
 355                 OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT);
 356         for(unsigned i = num_outputs; i < elements; ++i)
 357                 OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT);
 358
 359         if(nvfx->is_nv4x) {
 360                 unsigned i;
 361                 /* seems to be some kind of cache flushing */
 362                 for(i = 0; i < 3; ++i) {
 363                         OUT_RING(chan, RING_3D(0x1718, 1));
 364                         OUT_RING(chan, 0);
 365                 }
 366         }
 367
 368         OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
 369         for (unsigned i = 0; i < elements; i++)
 370                 OUT_RING(chan, 0);
 371
 372         OUT_RING(chan, RING_3D(0x1710, 1));
 373         OUT_RING(chan, 0);
 374
 375         nvfx->hw_vtxelt_nr = num_outputs;
 376         nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
 377 }
 378
 379 void
 380 nvfx_vbo_relocate(struct nvfx_context *nvfx)
 381 {
 382         struct nouveau_channel* chan;
 383         unsigned vb_flags;
 384         int i;
 385
 386         if(!nvfx->use_vertex_buffers)
 387                 return;
 388
 389         chan = nvfx->screen->base.channel;
 390         vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
 391
 392         MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
 393         for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
 394                 struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
 395                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 396                 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
 397
 398                 OUT_RELOC(chan, bo, RING_3D(NV30_3D_VTXBUF(ve->idx), 1),
 399                                 vb_flags, 0, 0);
 400                 OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
 401                                 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
 402                                 0, NV30_3D_VTXBUF_DMA1);
 403         }
 404         nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
 405 }
 406
 407 static void
 408 nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
 409 {
 410         struct nouveau_channel* chan = nvfx->screen->base.channel;
 411         unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV30_3D_IDXBUF_FORMAT_TYPE_U16 : NV30_3D_IDXBUF_FORMAT_TYPE_U32;
 412         struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
 413         ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
 414
 415         assert(nvfx->screen->index_buffer_reloc_flags);
 416
 417         MARK_RING(chan, 3, 3);
 418         if(ib_flags & NOUVEAU_BO_DUMMY)
 419                 OUT_RELOC(chan, bo, RING_3D(NV30_3D_IDXBUF_OFFSET, 2), ib_flags, 0, 0);
 420         else
 421                 OUT_RING(chan, RING_3D(NV30_3D_IDXBUF_OFFSET, 2));
 422         OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
 423         OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
 424                         0, NV30_3D_IDXBUF_FORMAT_DMA1);
 425         nvfx->relocs_needed &=~ NVFX_RELOCATE_IDXBUF;
 426 }
 427
 428 void
 429 nvfx_idxbuf_validate(struct nvfx_context* nvfx)
 430 {
 431         nvfx_idxbuf_emit(nvfx, 0);
 432 }
 433
 434 void
 435 nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
 436 {
 437         nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
 438 }
 439
 440 unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
 441 {
 442         [PIPE_FORMAT_R32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
 443         [PIPE_FORMAT_R32G32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
 444         [PIPE_FORMAT_R32G32B32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
 445         [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
 446         [PIPE_FORMAT_R16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
 447         [PIPE_FORMAT_R16G16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
 448         [PIPE_FORMAT_R16G16B16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
 449         [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
 450         [PIPE_FORMAT_R8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
 451         [PIPE_FORMAT_R8G8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
 452         [PIPE_FORMAT_R8G8B8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
 453         [PIPE_FORMAT_R8G8B8A8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
 454         [PIPE_FORMAT_R8G8B8A8_USCALED] = NV30_3D_VTXFMT_TYPE_U8_USCALED,
 455         [PIPE_FORMAT_R16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
 456         [PIPE_FORMAT_R16G16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
 457         [PIPE_FORMAT_R16G16B16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
 458         [PIPE_FORMAT_R16G16B16A16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
 459         [PIPE_FORMAT_R16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
 460         [PIPE_FORMAT_R16G16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
 461         [PIPE_FORMAT_R16G16B16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
 462         [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
 463 };
 464
 465 static void *
 466 nvfx_vtxelts_state_create(struct pipe_context *pipe,
 467                           unsigned num_elements,
 468                           const struct pipe_vertex_element *elements)
 469 {
 470         struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
 471         struct translate_key transkey;
 472         unsigned per_vertex_size[16];
 473         unsigned vb_compacted_index[16];
 474
 475         if(num_elements > 16)
 476         {
 477                 _debug_printf("Error: application attempted to use %u vertex elements, but only 16 are supported: ignoring the rest\n", num_elements);
 478                 num_elements = 16;
 479         }
 480
 481         memset(per_vertex_size, 0, sizeof(per_vertex_size));
 482         memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
 483         cso->num_elements = num_elements;
 484         cso->needs_translate = FALSE;
 485
 486         transkey.nr_elements = 0;
 487         transkey.output_stride = 0;
 488
 489         for(unsigned i = 0; i < num_elements; ++i)
 490         {
 491                 const struct pipe_vertex_element* ve = &elements[i];
 492                 if(!ve->instance_divisor)
 493                         per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
 494         }
 495
 496         for(unsigned i = 0; i < 16; ++i)
 497         {
 498                 if(per_vertex_size[i])
 499                 {
 500                         unsigned idx = cso->num_per_vertex_buffer_infos++;
 501                         cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
 502                         cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
 503                         vb_compacted_index[i] = idx;
 504                 }
 505         }
 506
 507         for(unsigned i = 0; i < num_elements; ++i)
 508         {
 509                 const struct pipe_vertex_element* ve = &elements[i];
 510                 unsigned type = nvfx_vertex_formats[ve->src_format];
 511                 unsigned ncomp = util_format_get_nr_components(ve->src_format);
 512
 513                 //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
 514                 if(ve->instance_divisor)
 515                 {
 516                         struct nvfx_low_frequency_element* lfve;
 517                         cso->vtxfmt[i] = NV30_3D_VTXFMT_TYPE_V32_FLOAT;
 518
 519                         //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
 520                         if(0)
 521                                 lfve = &cso->constant[cso->num_constant++];
 522                         else
 523                         {
 524                                 lfve = &cso->per_instance[cso->num_per_instance++].base;
 525                                 ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
 526                         }
 527
 528                         lfve->idx = i;
 529                         lfve->vertex_buffer_index = ve->vertex_buffer_index;
 530                         lfve->src_offset = ve->src_offset;
 531                         lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
 532                         lfve->ncomp = ncomp;
 533                 }
 534                 else
 535                 {
 536                         unsigned idx;
 537
 538                         idx = cso->num_per_vertex++;
 539                         cso->per_vertex[idx].idx = i;
 540                         cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
 541                         cso->per_vertex[idx].src_offset = ve->src_offset;
 542
 543                         idx = transkey.nr_elements++;
 544                         transkey.element[idx].input_format = ve->src_format;
 545                         transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
 546                         transkey.element[idx].input_offset = ve->src_offset;
 547                         transkey.element[idx].instance_divisor = 0;
 548                         transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
 549                         if(type)
 550                         {
 551                                 transkey.element[idx].output_format = ve->src_format;
 552                                 cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | type;
 553                         }
 554                         else
 555                         {
 556                                 unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
 557                                 transkey.element[idx].output_format = float32[ncomp - 1];
 558                                 cso->needs_translate = TRUE;
 559                                 cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT;
 560                         }
 561                         transkey.element[idx].output_offset = transkey.output_stride;
 562                         transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
 563                 }
 564         }
 565
 566         cso->translate = translate_create(&transkey);
 567         cso->vertex_length = transkey.output_stride >> 2;
 568         cso->max_vertices_per_packet = 2047 / MAX2(cso->vertex_length, 1);
 569
 570         return (void *)cso;
 571 }
 572
 573 static void
 574 nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 575 {
 576         FREE(hwcso);
 577 }
 578
 579 static void
 580 nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
 581 {
 582         struct nvfx_context *nvfx = nvfx_context(pipe);
 583
 584         nvfx->vtxelt = hwcso;
 585         nvfx->use_vertex_buffers = -1;
 586         nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 587 }
 588
 589 static void
 590 nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
 591                         const struct pipe_vertex_buffer *vb)
 592 {
 593         struct nvfx_context *nvfx = nvfx_context(pipe);
 594
 595         for(unsigned i = 0; i < count; ++i)
 596         {
 597                 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
 598                 nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
 599                 nvfx->vtxbuf[i].max_index = vb[i].max_index;
 600                 nvfx->vtxbuf[i].stride = vb[i].stride;
 601         }
 602
 603         for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
 604                 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
 605
 606         nvfx->vtxbuf_nr = count;
 607         nvfx->use_vertex_buffers = -1;
 608         nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 609 }
 610
 611 static void
 612 nvfx_set_index_buffer(struct pipe_context *pipe,
 613                       const struct pipe_index_buffer *ib)
 614 {
 615         struct nvfx_context *nvfx = nvfx_context(pipe);
 616
 617         if(ib)
 618         {
 619                 pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
 620                 nvfx->idxbuf.index_size = ib->index_size;
 621                 nvfx->idxbuf.offset = ib->offset;
 622         }
 623         else
 624         {
 625                 pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
 626                 nvfx->idxbuf.index_size = 0;
 627                 nvfx->idxbuf.offset = 0;
 628         }
 629
 630         nvfx->dirty |= NVFX_NEW_INDEX;
 631         nvfx->draw_dirty |= NVFX_NEW_INDEX;
 632 }
 633
 634 void
 635 nvfx_init_vbo_functions(struct nvfx_context *nvfx)
 636 {
 637         nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
 638         nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
 639
 640         nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
 641         nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
 642         nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
 643 }