src/gallium/drivers/nvfx/nvfx_vbo.c

   1 #include "pipe/p_context.h"
   2 #include "pipe/p_state.h"
   3 #include "util/u_inlines.h"
   4 #include "util/u_format.h"
   5 #include "translate/translate.h"
   6
   7 #include "nvfx_context.h"
   8 #include "nvfx_state.h"
   9 #include "nvfx_resource.h"
  10
  11 #include "nouveau/nouveau_channel.h"
  12 #include "nouveau/nv04_pushbuf.h"
  13
  14 static inline unsigned
  15 util_guess_unique_indices_count(unsigned mode, unsigned indices)
  16 {
  17         /* Euler's formula gives V =
  18          * = E - F + 2 =
  19          * = F * (polygon_edges / 2 - 1) + 2 =
  20          * =  F * (polygon_edges - 2) / 2 + 2 =
  21          * =  indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
  22          * =  indices * (1 / 2 - 1 / polygon_edges) + 2
  23          */
  24         switch(mode)
  25         {
  26         case PIPE_PRIM_LINES:
  27                 return indices >> 1;
  28         case PIPE_PRIM_TRIANGLES:
  29         {
  30                 // avoid an expensive division by 3 using the multiplicative inverse mod 2^32
  31                 unsigned q;
  32                 unsigned inv3 = 2863311531;
  33                 indices >>= 1;
  34                 q = indices * inv3;
  35                 if(unlikely(q >= indices))
  36                 {
  37                         q += inv3;
  38                         if(q >= indices)
  39                                 q += inv3;
  40                 }
  41                 return indices + 2;
  42                 //return indices / 6 + 2;
  43         }
  44         // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
  45         case PIPE_PRIM_QUADS:
  46                 return (indices >> 1) + 2;
  47         //      return (indices >> 2) + 2; // if it is a closed mesh
  48         default:
  49                 return indices;
  50         }
  51 }
  52
  53 static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
  54 {
  55         struct nvfx_context* nvfx = nvfx_context(pipe);
  56         unsigned hardware_cost = 0;
  57         unsigned inline_cost = 0;
  58         unsigned unique_vertices;
  59         unsigned upload_mode;
  60         float best_index_cost_for_hardware_vertices_as_inline_cost;
  61         boolean prefer_hardware_indices;
  62         unsigned index_inline_cost;
  63         unsigned index_hardware_cost;
  64         if (info->indexed)
  65                 unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
  66         else
  67                 unique_vertices = info->count;
  68
  69         /* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
  70          * or create hardware buffer objects and pointing the hardware to them.
  71          *
  72          * This is done by computing the total memcpy cost of each option, ignoring uploads
  73          * if we think that the buffer is static and thus the upload cost will be amortized over
  74          * future draw calls.
  75          *
  76          * For instance, if everything looks static, we will always create buffer objects, while if
  77          * everything is a user buffer and we are not doing indexed drawing, we never do.
  78          *
  79          * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
  80          * where we will upload the vertex buffer, so that we can use hardware index lookup, and
  81          * the opposite case, where we instead do index lookup in software to avoid uploading
  82          * a huge amount of vertex data that is not going to be used.
  83          *
  84          * Otherwise, we generally move to the GPU the after it has been pushed
  85          * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
  86          * been updated with a transfer (or just the buffer having been destroyed).
  87          *
  88          * There is no special handling for user buffers, since applications can use
  89          * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
  90          * by the way.
  91          *
  92          * Note that currently we don't support only putting some data on the FIFO, and
  93          * some on vertex buffers (constant and instanced data is independent from this).
  94          *
  95          * nVidia doesn't seem to do this either, even though it should be at least
  96          * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
  97          */
  98
  99         for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
 100         {
 101                 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
 102                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
 103                 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 104                 buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
 105                 if (!nvfx_buffer_seems_static(buffer))
 106                 {
 107                         hardware_cost += buffer->dirty_end - buffer->dirty_begin;
 108                         if (!buffer->base.bo)
 109                                 hardware_cost += nvfx->screen->buffer_allocation_cost;
 110                 }
 111                 inline_cost += vbi->per_vertex_size * info->count;
 112         }
 113
 114         best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
 115         prefer_hardware_indices = FALSE;
 116         index_inline_cost = 0;
 117         index_hardware_cost = 0;
 118
 119         if (info->indexed)
 120         {
 121                 index_inline_cost = nvfx->idxbuf.index_size * info->count;
 122                 if (nvfx->screen->index_buffer_reloc_flags
 123                         && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
 124                         && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
 125                 {
 126                         struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
 127                         buffer->bytes_to_draw_until_static -= index_inline_cost;
 128
 129                         prefer_hardware_indices = TRUE;
 130
 131                         if (!nvfx_buffer_seems_static(buffer))
 132                         {
 133                                 index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
 134                                 if (!buffer->base.bo)
 135                                         index_hardware_cost += nvfx->screen->buffer_allocation_cost;
 136                         }
 137
 138                         if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
 139                         {
 140                                 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
 141                         }
 142                         else
 143                         {
 144                                 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
 145                                 prefer_hardware_indices = TRUE;
 146                         }
 147                 }
 148         }
 149
 150         /* let's finally figure out which of the 3 paths we want to take */
 151         if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
 152                 upload_mode = 1 + prefer_hardware_indices;
 153         else
 154                 upload_mode = 0;
 155
 156 #ifdef DEBUG
 157         if (unlikely(nvfx->screen->trace_draw))
 158           {
 159                   fprintf(stderr, "DRAW");
 160                   if (info->indexed)
 161                   {
 162                           fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
 163                           if (info->index_bias)
 164                                   fprintf(stderr, " biased %u", info->index_bias);
 165                           fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
 166                   }
 167                   if (info->instance_count > 1)
 168                           fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
 169                   fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
 170                   if (!upload_mode)
 171                           fprintf(stderr, " -> inline vertex data");
 172                   else if (upload_mode == 2 || !info->indexed)
 173                           fprintf(stderr, " -> buffer range");
 174                   else
 175                           fprintf(stderr, " -> inline indices");
 176                   fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
 177                   for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
 178                   {
 179                           struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
 180                           struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
 181                           struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 182                           if (i)
 183                                   fprintf(stderr, ", ");
 184                           fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
 185                   }
 186                   fprintf(stderr, ">\n");
 187           }
 188 #endif
 189
 190         return upload_mode;
 191 }
 192
 193 void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 194 {
 195         struct nvfx_context *nvfx = nvfx_context(pipe);
 196         unsigned upload_mode = 0;
 197
 198         if (!nvfx->vtxelt->needs_translate)
 199                 upload_mode = nvfx_decide_upload_mode(pipe, info);
 200
 201         nvfx->use_index_buffer = upload_mode > 1;
 202
 203         if ((upload_mode > 0) != nvfx->use_vertex_buffers)
 204         {
 205                 nvfx->use_vertex_buffers = (upload_mode > 0);
 206                 nvfx->dirty |= NVFX_NEW_ARRAYS;
 207                 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 208         }
 209
 210         if (upload_mode > 0)
 211         {
 212                 for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
 213                 {
 214                         struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
 215                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
 216                         nvfx_buffer_upload(nvfx_buffer(vb->buffer));
 217                 }
 218
 219                 if (upload_mode > 1)
 220                 {
 221                         nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
 222
 223                         if (unlikely(info->index_bias != nvfx->base_vertex))
 224                         {
 225                                 nvfx->base_vertex = info->index_bias;
 226                                 nvfx->dirty |= NVFX_NEW_ARRAYS;
 227                         }
 228                 }
 229                 else
 230                 {
 231                         if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
 232                         {
 233                                 nvfx->base_vertex = 0;
 234                                 nvfx->dirty |= NVFX_NEW_ARRAYS;
 235                         }
 236                 }
 237         }
 238
 239         if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
 240                 nvfx_draw_vbo_swtnl(pipe, info);
 241         else
 242                 nvfx_push_vbo(pipe, info);
 243 }
 244
 245 boolean
 246 nvfx_vbo_validate(struct nvfx_context *nvfx)
 247 {
 248         struct nouveau_channel* chan = nvfx->screen->base.channel;
 249         int i;
 250         int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
 251         unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
 252
 253         if (!elements)
 254                 return TRUE;
 255
 256         MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
 257         for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
 258         {
 259                 struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
 260                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 261                 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 262                 float v[4];
 263                 ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
 264                 nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
 265         }
 266
 267
 268         OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
 269         if(nvfx->use_vertex_buffers)
 270         {
 271                 unsigned idx = 0;
 272                 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
 273                         struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
 274                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 275
 276                         if(idx != ve->idx)
 277                         {
 278                                 assert(idx < ve->idx);
 279                                 OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
 280                                 idx = ve->idx;
 281                         }
 282
 283                         OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV30_3D_VTXFMT_STRIDE__SHIFT));
 284                         ++idx;
 285                 }
 286                 if(idx != nvfx->vtxelt->num_elements)
 287                         OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
 288         }
 289         else
 290                 OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
 291
 292         for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
 293                 OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT);
 294
 295         if(nvfx->is_nv4x) {
 296                 unsigned i;
 297                 /* seems to be some kind of cache flushing */
 298                 for(i = 0; i < 3; ++i) {
 299                         OUT_RING(chan, RING_3D(0x1718, 1));
 300                         OUT_RING(chan, 0);
 301                 }
 302         }
 303
 304         OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
 305         if(nvfx->use_vertex_buffers)
 306         {
 307                 unsigned idx = 0;
 308                 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
 309                         struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
 310                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 311                         struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
 312
 313                         for(; idx < ve->idx; ++idx)
 314                                 OUT_RING(chan, 0);
 315
 316                         OUT_RELOC(chan, bo,
 317                                         vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
 318                                         vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
 319                                         0, NV30_3D_VTXBUF_DMA1);
 320                         ++idx;
 321                 }
 322
 323                 for(; idx < elements; ++idx)
 324                         OUT_RING(chan, 0);
 325         }
 326         else
 327         {
 328                 for (i = 0; i < elements; i++)
 329                         OUT_RING(chan, 0);
 330         }
 331
 332         OUT_RING(chan, RING_3D(0x1710, 1));
 333         OUT_RING(chan, 0);
 334
 335         nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
 336         nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
 337         return TRUE;
 338 }
 339
 340 void
 341 nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
 342 {
 343         struct nouveau_channel* chan = nvfx->screen->base.channel;
 344         unsigned num_outputs = nvfx->vertprog->draw_elements;
 345         int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr);
 346
 347         if (!elements)
 348                 return;
 349
 350         WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
 351
 352         OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
 353         for(unsigned i = 0; i < num_outputs; ++i)
 354                 OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT);
 355         for(unsigned i = num_outputs; i < elements; ++i)
 356                 OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT);
 357
 358         if(nvfx->is_nv4x) {
 359                 unsigned i;
 360                 /* seems to be some kind of cache flushing */
 361                 for(i = 0; i < 3; ++i) {
 362                         OUT_RING(chan, RING_3D(0x1718, 1));
 363                         OUT_RING(chan, 0);
 364                 }
 365         }
 366
 367         OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
 368         for (unsigned i = 0; i < elements; i++)
 369                 OUT_RING(chan, 0);
 370
 371         OUT_RING(chan, RING_3D(0x1710, 1));
 372         OUT_RING(chan, 0);
 373
 374         nvfx->hw_vtxelt_nr = num_outputs;
 375         nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
 376 }
 377
 378 void
 379 nvfx_vbo_relocate(struct nvfx_context *nvfx)
 380 {
 381         struct nouveau_channel* chan;
 382         unsigned vb_flags;
 383         int i;
 384
 385         if(!nvfx->use_vertex_buffers)
 386                 return;
 387
 388         chan = nvfx->screen->base.channel;
 389         vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
 390
 391         MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
 392         for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
 393                 struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
 394                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 395                 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
 396
 397                 OUT_RELOC(chan, bo, RING_3D(NV30_3D_VTXBUF(ve->idx), 1),
 398                                 vb_flags, 0, 0);
 399                 OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
 400                                 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
 401                                 0, NV30_3D_VTXBUF_DMA1);
 402         }
 403         nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
 404 }
 405
 406 static void
 407 nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
 408 {
 409         struct nouveau_channel* chan = nvfx->screen->base.channel;
 410         unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV30_3D_IDXBUF_FORMAT_TYPE_U16 : NV30_3D_IDXBUF_FORMAT_TYPE_U32;
 411         struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
 412         ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
 413
 414         assert(nvfx->screen->index_buffer_reloc_flags);
 415
 416         MARK_RING(chan, 3, 3);
 417         if(ib_flags & NOUVEAU_BO_DUMMY)
 418                 OUT_RELOC(chan, bo, RING_3D(NV30_3D_IDXBUF_OFFSET, 2), ib_flags, 0, 0);
 419         else
 420                 OUT_RING(chan, RING_3D(NV30_3D_IDXBUF_OFFSET, 2));
 421         OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
 422         OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
 423                         0, NV30_3D_IDXBUF_FORMAT_DMA1);
 424         nvfx->relocs_needed &=~ NVFX_RELOCATE_IDXBUF;
 425 }
 426
 427 void
 428 nvfx_idxbuf_validate(struct nvfx_context* nvfx)
 429 {
 430         nvfx_idxbuf_emit(nvfx, 0);
 431 }
 432
 433 void
 434 nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
 435 {
 436         nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
 437 }
 438
 439 unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
 440 {
 441         [PIPE_FORMAT_R32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
 442         [PIPE_FORMAT_R32G32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
 443         [PIPE_FORMAT_R32G32B32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
 444         [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
 445         [PIPE_FORMAT_R16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
 446         [PIPE_FORMAT_R16G16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
 447         [PIPE_FORMAT_R16G16B16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
 448         [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
 449         [PIPE_FORMAT_R8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
 450         [PIPE_FORMAT_R8G8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
 451         [PIPE_FORMAT_R8G8B8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
 452         [PIPE_FORMAT_R8G8B8A8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
 453         [PIPE_FORMAT_R8G8B8A8_USCALED] = NV30_3D_VTXFMT_TYPE_U8_USCALED,
 454         [PIPE_FORMAT_R16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
 455         [PIPE_FORMAT_R16G16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
 456         [PIPE_FORMAT_R16G16B16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
 457         [PIPE_FORMAT_R16G16B16A16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
 458         [PIPE_FORMAT_R16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
 459         [PIPE_FORMAT_R16G16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
 460         [PIPE_FORMAT_R16G16B16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
 461         [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
 462 };
 463
 464 static void *
 465 nvfx_vtxelts_state_create(struct pipe_context *pipe,
 466                           unsigned num_elements,
 467                           const struct pipe_vertex_element *elements)
 468 {
 469         struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
 470         struct translate_key transkey;
 471         unsigned per_vertex_size[16];
 472         unsigned vb_compacted_index[16];
 473
 474         if(num_elements > 16)
 475         {
 476                 _debug_printf("Error: application attempted to use %u vertex elements, but only 16 are supported: ignoring the rest\n", num_elements);
 477                 num_elements = 16;
 478         }
 479
 480         memset(per_vertex_size, 0, sizeof(per_vertex_size));
 481         memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
 482         cso->num_elements = num_elements;
 483         cso->needs_translate = FALSE;
 484
 485         transkey.nr_elements = 0;
 486         transkey.output_stride = 0;
 487
 488         for(unsigned i = 0; i < num_elements; ++i)
 489         {
 490                 const struct pipe_vertex_element* ve = &elements[i];
 491                 if(!ve->instance_divisor)
 492                         per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
 493         }
 494
 495         for(unsigned i = 0; i < 16; ++i)
 496         {
 497                 if(per_vertex_size[i])
 498                 {
 499                         unsigned idx = cso->num_per_vertex_buffer_infos++;
 500                         cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
 501                         cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
 502                         vb_compacted_index[i] = idx;
 503                 }
 504         }
 505
 506         for(unsigned i = 0; i < num_elements; ++i)
 507         {
 508                 const struct pipe_vertex_element* ve = &elements[i];
 509                 unsigned type = nvfx_vertex_formats[ve->src_format];
 510                 unsigned ncomp = util_format_get_nr_components(ve->src_format);
 511
 512                 //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
 513                 if(ve->instance_divisor)
 514                 {
 515                         struct nvfx_low_frequency_element* lfve;
 516                         cso->vtxfmt[i] = NV30_3D_VTXFMT_TYPE_V32_FLOAT;
 517
 518                         //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
 519                         if(0)
 520                                 lfve = &cso->constant[cso->num_constant++];
 521                         else
 522                         {
 523                                 lfve = &cso->per_instance[cso->num_per_instance++].base;
 524                                 ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
 525                         }
 526
 527                         lfve->idx = i;
 528                         lfve->vertex_buffer_index = ve->vertex_buffer_index;
 529                         lfve->src_offset = ve->src_offset;
 530                         lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
 531                         lfve->ncomp = ncomp;
 532                 }
 533                 else
 534                 {
 535                         unsigned idx;
 536
 537                         idx = cso->num_per_vertex++;
 538                         cso->per_vertex[idx].idx = i;
 539                         cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
 540                         cso->per_vertex[idx].src_offset = ve->src_offset;
 541
 542                         idx = transkey.nr_elements++;
 543                         transkey.element[idx].input_format = ve->src_format;
 544                         transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
 545                         transkey.element[idx].input_offset = ve->src_offset;
 546                         transkey.element[idx].instance_divisor = 0;
 547                         transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
 548                         if(type)
 549                         {
 550                                 transkey.element[idx].output_format = ve->src_format;
 551                                 cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | type;
 552                         }
 553                         else
 554                         {
 555                                 unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
 556                                 transkey.element[idx].output_format = float32[ncomp - 1];
 557                                 cso->needs_translate = TRUE;
 558                                 cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT;
 559                         }
 560                         transkey.element[idx].output_offset = transkey.output_stride;
 561                         transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
 562                 }
 563         }
 564
 565         cso->translate = translate_create(&transkey);
 566         cso->vertex_length = transkey.output_stride >> 2;
 567         cso->max_vertices_per_packet = 2047 / MAX2(cso->vertex_length, 1);
 568
 569         return (void *)cso;
 570 }
 571
 572 static void
 573 nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 574 {
 575         FREE(hwcso);
 576 }
 577
 578 static void
 579 nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
 580 {
 581         struct nvfx_context *nvfx = nvfx_context(pipe);
 582
 583         nvfx->vtxelt = hwcso;
 584         nvfx->use_vertex_buffers = -1;
 585         nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 586 }
 587
 588 static void
 589 nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
 590                         const struct pipe_vertex_buffer *vb)
 591 {
 592         struct nvfx_context *nvfx = nvfx_context(pipe);
 593
 594         for(unsigned i = 0; i < count; ++i)
 595         {
 596                 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
 597                 nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
 598                 nvfx->vtxbuf[i].max_index = vb[i].max_index;
 599                 nvfx->vtxbuf[i].stride = vb[i].stride;
 600         }
 601
 602         for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
 603                 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
 604
 605         nvfx->vtxbuf_nr = count;
 606         nvfx->use_vertex_buffers = -1;
 607         nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 608 }
 609
 610 static void
 611 nvfx_set_index_buffer(struct pipe_context *pipe,
 612                       const struct pipe_index_buffer *ib)
 613 {
 614         struct nvfx_context *nvfx = nvfx_context(pipe);
 615
 616         if(ib)
 617         {
 618                 pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
 619                 nvfx->idxbuf.index_size = ib->index_size;
 620                 nvfx->idxbuf.offset = ib->offset;
 621         }
 622         else
 623         {
 624                 pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
 625                 nvfx->idxbuf.index_size = 0;
 626                 nvfx->idxbuf.offset = 0;
 627         }
 628
 629         nvfx->dirty |= NVFX_NEW_INDEX;
 630         nvfx->draw_dirty |= NVFX_NEW_INDEX;
 631 }
 632
 633 void
 634 nvfx_init_vbo_functions(struct nvfx_context *nvfx)
 635 {
 636         nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
 637         nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
 638
 639         nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
 640         nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
 641         nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
 642 }