src/gallium/drivers/nvfx/nvfx_vbo.c

   1 #include "pipe/p_context.h"
   2 #include "pipe/p_state.h"
   3 #include "util/u_inlines.h"
   4 #include "util/u_format.h"
   5 #include "translate/translate.h"
   6
   7 #include "nvfx_context.h"
   8 #include "nvfx_state.h"
   9 #include "nvfx_resource.h"
  10
  11 #include "nouveau/nouveau_channel.h"
  12 #include "nouveau/nouveau_class.h"
  13 #include "nouveau/nouveau_pushbuf.h"
  14
  15 static inline unsigned
  16 util_guess_unique_indices_count(unsigned mode, unsigned indices)
  17 {
  18         /* Euler's formula gives V =
  19          * = E - F + 2 =
  20          * = F * (polygon_edges / 2 - 1) + 2 =
  21          * =  F * (polygon_edges - 2) / 2 + 2 =
  22          * =  indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
  23          * =  indices * (1 / 2 - 1 / polygon_edges) + 2
  24          */
  25         switch(mode)
  26         {
  27         case PIPE_PRIM_LINES:
  28                 return indices >> 1;
  29         case PIPE_PRIM_TRIANGLES:
  30         {
  31                 // avoid an expensive division by 3 using the multiplicative inverse mod 2^32
  32                 unsigned q;
  33                 unsigned inv3 = 2863311531;
  34                 indices >>= 1;
  35                 q = indices * inv3;
  36                 if(unlikely(q >= indices))
  37                 {
  38                         q += inv3;
  39                         if(q >= indices)
  40                                 q += inv3;
  41                 }
  42                 return indices + 2;
  43                 //return indices / 6 + 2;
  44         }
  45         // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
  46         case PIPE_PRIM_QUADS:
  47                 return (indices >> 1) + 2;
  48         //      return (indices >> 2) + 2; // if it is a closed mesh
  49         default:
  50                 return indices;
  51         }
  52 }
  53
  54 static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
  55 {
  56         struct nvfx_context* nvfx = nvfx_context(pipe);
  57         unsigned hardware_cost = 0;
  58         unsigned inline_cost = 0;
  59         unsigned unique_vertices;
  60         unsigned upload_mode;
  61         if (info->indexed)
  62                 unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
  63         else
  64                 unique_vertices = info->count;
  65
  66         /* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
  67          * or create hardware buffer objects and pointing the hardware to them.
  68          *
  69          * This is done by computing the total memcpy cost of each option, ignoring uploads
  70          * if we think that the buffer is static and thus the upload cost will be amortized over
  71          * future draw calls.
  72          *
  73          * For instance, if everything looks static, we will always create buffer objects, while if
  74          * everything is a user buffer and we are not doing indexed drawing, we never do.
  75          *
  76          * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
  77          * where we will upload the vertex buffer, so that we can use hardware index lookup, and
  78          * the opposite case, where we instead do index lookup in software to avoid uploading
  79          * a huge amount of vertex data that is not going to be used.
  80          *
  81          * Otherwise, we generally move to the GPU the after it has been pushed
  82          * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
  83          * been updated with a transfer (or just the buffer having been destroyed).
  84          *
  85          * There is no special handling for user buffers, since applications can use
  86          * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
  87          * by the way.
  88          *
  89          * Note that currently we don't support only putting some data on the FIFO, and
  90          * some on vertex buffers (constant and instanced data is independent from this).
  91          *
  92          * nVidia doesn't seem to do this either, even though it should be at least
  93          * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
  94          */
  95
  96         for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
  97         {
  98                 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
  99                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
 100                 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 101                 buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
 102                 if (!nvfx_buffer_seems_static(buffer))
 103                 {
 104                         hardware_cost += buffer->dirty_end - buffer->dirty_begin;
 105                         if (!buffer->base.bo)
 106                                 hardware_cost += nvfx->screen->buffer_allocation_cost;
 107                 }
 108                 inline_cost += vbi->per_vertex_size * info->count;
 109         }
 110
 111         float best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
 112         boolean prefer_hardware_indices = FALSE;
 113         unsigned index_inline_cost = 0;
 114         unsigned index_hardware_cost = 0;
 115
 116         if (info->indexed)
 117         {
 118                 index_inline_cost = nvfx->idxbuf.index_size * info->count;
 119                 if (nvfx->screen->index_buffer_reloc_flags
 120                         && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
 121                         && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
 122                 {
 123                         struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
 124                         buffer->bytes_to_draw_until_static -= index_inline_cost;
 125
 126                         prefer_hardware_indices = TRUE;
 127
 128                         if (!nvfx_buffer_seems_static(buffer))
 129                         {
 130                                 index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
 131                                 if (!buffer->base.bo)
 132                                         index_hardware_cost += nvfx->screen->buffer_allocation_cost;
 133                         }
 134
 135                         if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
 136                         {
 137                                 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
 138                         }
 139                         else
 140                         {
 141                                 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
 142                                 prefer_hardware_indices = TRUE;
 143                         }
 144                 }
 145         }
 146
 147         /* let's finally figure out which of the 3 paths we want to take */
 148         if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
 149                 upload_mode = 1 + prefer_hardware_indices;
 150         else
 151                 upload_mode = 0;
 152
 153 #ifdef DEBUG
 154         if (unlikely(nvfx->screen->trace_draw))
 155           {
 156                   fprintf(stderr, "DRAW");
 157                   if (info->indexed)
 158                   {
 159                           fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
 160                           if (info->index_bias)
 161                                   fprintf(stderr, " biased %u", info->index_bias);
 162                           fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
 163                   }
 164                   if (info->instance_count > 1)
 165                           fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
 166                   fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
 167                   if (!upload_mode)
 168                           fprintf(stderr, " -> inline vertex data");
 169                   else if (upload_mode == 2 || !info->indexed)
 170                           fprintf(stderr, " -> buffer range");
 171                   else
 172                           fprintf(stderr, " -> inline indices");
 173                   fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
 174                   for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
 175                   {
 176                           struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
 177                           struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
 178                           struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 179                           if (i)
 180                                   fprintf(stderr, ", ");
 181                           fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
 182                   }
 183                   fprintf(stderr, ">\n");
 184           }
 185 #endif
 186
 187         return upload_mode;
 188 }
 189
 190 void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 191 {
 192         struct nvfx_context *nvfx = nvfx_context(pipe);
 193         unsigned upload_mode = 0;
 194
 195         if (!nvfx->vtxelt->needs_translate)
 196                 upload_mode = nvfx_decide_upload_mode(pipe, info);
 197
 198         nvfx->use_index_buffer = upload_mode > 1;
 199
 200         if ((upload_mode > 0) != nvfx->use_vertex_buffers)
 201         {
 202                 nvfx->use_vertex_buffers = (upload_mode > 0);
 203                 nvfx->dirty |= NVFX_NEW_ARRAYS;
 204                 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 205         }
 206
 207         if (upload_mode > 0)
 208         {
 209                 for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
 210                 {
 211                         struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
 212                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
 213                         nvfx_buffer_upload(nvfx_buffer(vb->buffer));
 214                 }
 215
 216                 if (upload_mode > 1)
 217                 {
 218                         nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
 219
 220                         if (unlikely(info->index_bias != nvfx->base_vertex))
 221                         {
 222                                 nvfx->base_vertex = info->index_bias;
 223                                 nvfx->dirty |= NVFX_NEW_ARRAYS;
 224                         }
 225                 }
 226                 else
 227                 {
 228                         if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
 229                         {
 230                                 nvfx->base_vertex = 0;
 231                                 nvfx->dirty |= NVFX_NEW_ARRAYS;
 232                         }
 233                 }
 234         }
 235
 236         if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
 237                 nvfx_draw_vbo_swtnl(pipe, info);
 238         else
 239                 nvfx_push_vbo(pipe, info);
 240 }
 241
 242 boolean
 243 nvfx_vbo_validate(struct nvfx_context *nvfx)
 244 {
 245         struct nouveau_channel* chan = nvfx->screen->base.channel;
 246         int i;
 247         int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
 248         unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
 249
 250         if (!elements)
 251                 return TRUE;
 252
 253         MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
 254         for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
 255         {
 256                 struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
 257                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 258                 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 259                 float v[4];
 260                 ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
 261                 nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
 262         }
 263
 264
 265         OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
 266         if(nvfx->use_vertex_buffers)
 267         {
 268                 unsigned idx = 0;
 269                 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
 270                         struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
 271                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 272
 273                         if(idx != ve->idx)
 274                         {
 275                                 assert(idx < ve->idx);
 276                                 OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
 277                                 idx = ve->idx;
 278                         }
 279
 280                         OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT));
 281                         ++idx;
 282                 }
 283                 if(idx != nvfx->vtxelt->num_elements)
 284                         OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
 285         }
 286         else
 287                 OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
 288
 289         for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
 290                 OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT);
 291
 292         if(nvfx->is_nv4x) {
 293                 unsigned i;
 294                 /* seems to be some kind of cache flushing */
 295                 for(i = 0; i < 3; ++i) {
 296                         OUT_RING(chan, RING_3D(0x1718, 1));
 297                         OUT_RING(chan, 0);
 298                 }
 299         }
 300
 301         OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
 302         if(nvfx->use_vertex_buffers)
 303         {
 304                 unsigned idx = 0;
 305                 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
 306                         struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
 307                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 308                         struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
 309
 310                         for(; idx < ve->idx; ++idx)
 311                                 OUT_RING(chan, 0);
 312
 313                         OUT_RELOC(chan, bo,
 314                                         vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
 315                                         vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
 316                                         0, NV34TCL_VTXBUF_ADDRESS_DMA1);
 317                         ++idx;
 318                 }
 319
 320                 for(; idx < elements; ++idx)
 321                         OUT_RING(chan, 0);
 322         }
 323         else
 324         {
 325                 for (i = 0; i < elements; i++)
 326                         OUT_RING(chan, 0);
 327         }
 328
 329         OUT_RING(chan, RING_3D(0x1710, 1));
 330         OUT_RING(chan, 0);
 331
 332         nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
 333         return TRUE;
 334 }
 335
 336 void
 337 nvfx_vbo_relocate(struct nvfx_context *nvfx)
 338 {
 339         if(!nvfx->use_vertex_buffers)
 340                 return;
 341
 342         struct nouveau_channel* chan = nvfx->screen->base.channel;
 343         unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
 344         int i;
 345
 346         MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
 347         for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
 348                 struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
 349                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 350                 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
 351
 352                 OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(ve->idx), 1),
 353                                 vb_flags, 0, 0);
 354                 OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
 355                                 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
 356                                 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
 357         }
 358 }
 359
 360 static void
 361 nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
 362 {
 363         struct nouveau_channel* chan = nvfx->screen->base.channel;
 364         unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV34TCL_IDXBUF_FORMAT_TYPE_U16 : NV34TCL_IDXBUF_FORMAT_TYPE_U32;
 365         struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
 366         ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
 367
 368         assert(nvfx->screen->index_buffer_reloc_flags);
 369
 370         MARK_RING(chan, 3, 3);
 371         if(ib_flags & NOUVEAU_BO_DUMMY)
 372                 OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), ib_flags, 0, 0);
 373         else
 374                 OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
 375         OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
 376         OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
 377                         0, NV34TCL_IDXBUF_FORMAT_DMA1);
 378 }
 379
 380 void
 381 nvfx_idxbuf_validate(struct nvfx_context* nvfx)
 382 {
 383         nvfx_idxbuf_emit(nvfx, 0);
 384 }
 385
 386 void
 387 nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
 388 {
 389         nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
 390 }
 391
 392 unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
 393 {
 394         [PIPE_FORMAT_R32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
 395         [PIPE_FORMAT_R32G32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
 396         [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
 397         [PIPE_FORMAT_R32G32B32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
 398         [PIPE_FORMAT_R16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
 399         [PIPE_FORMAT_R16G16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
 400         [PIPE_FORMAT_R16G16B16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
 401         [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
 402         [PIPE_FORMAT_R8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
 403         [PIPE_FORMAT_R8G8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
 404         [PIPE_FORMAT_R8G8B8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
 405         [PIPE_FORMAT_R8G8B8A8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
 406         [PIPE_FORMAT_R8G8B8A8_USCALED] = NV34TCL_VTXFMT_TYPE_8_USCALED,
 407         [PIPE_FORMAT_R16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
 408         [PIPE_FORMAT_R16G16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
 409         [PIPE_FORMAT_R16G16B16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
 410         [PIPE_FORMAT_R16G16B16A16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
 411         [PIPE_FORMAT_R16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
 412         [PIPE_FORMAT_R16G16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
 413         [PIPE_FORMAT_R16G16B16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
 414         [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
 415 };
 416
 417 static void *
 418 nvfx_vtxelts_state_create(struct pipe_context *pipe,
 419                           unsigned num_elements,
 420                           const struct pipe_vertex_element *elements)
 421 {
 422         struct nvfx_context* nvfx = nvfx_context(pipe);
 423         struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
 424         struct translate_key transkey;
 425         unsigned per_vertex_size[16];
 426         memset(per_vertex_size, 0, sizeof(per_vertex_size));
 427
 428         unsigned vb_compacted_index[16];
 429
 430         assert(num_elements < 16); /* not doing fallbacks yet */
 431
 432         memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
 433         cso->num_elements = num_elements;
 434         cso->needs_translate = FALSE;
 435
 436         transkey.nr_elements = 0;
 437         transkey.output_stride = 0;
 438
 439         for(unsigned i = 0; i < num_elements; ++i)
 440         {
 441                 const struct pipe_vertex_element* ve = &elements[i];
 442                 if(!ve->instance_divisor)
 443                         per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
 444         }
 445
 446         for(unsigned i = 0; i < 16; ++i)
 447         {
 448                 if(per_vertex_size[i])
 449                 {
 450                         unsigned idx = cso->num_per_vertex_buffer_infos++;
 451                         cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
 452                         cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
 453                         vb_compacted_index[i] = idx;
 454                 }
 455         }
 456
 457         for(unsigned i = 0; i < num_elements; ++i)
 458         {
 459                 const struct pipe_vertex_element* ve = &elements[i];
 460                 unsigned type = nvfx_vertex_formats[ve->src_format];
 461                 unsigned ncomp = util_format_get_nr_components(ve->src_format);
 462
 463                 //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
 464                 if(ve->instance_divisor)
 465                 {
 466                         struct nvfx_low_frequency_element* lfve;
 467                         cso->vtxfmt[i] = NV34TCL_VTXFMT_TYPE_32_FLOAT;
 468
 469                         //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
 470                         if(0)
 471                                 lfve = &cso->constant[cso->num_constant++];
 472                         else
 473                         {
 474                                 lfve = &cso->per_instance[cso->num_per_instance++].base;
 475                                 ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
 476                         }
 477
 478                         lfve->idx = i;
 479                         lfve->vertex_buffer_index = ve->vertex_buffer_index;
 480                         lfve->src_offset = ve->src_offset;
 481                         lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
 482                         lfve->ncomp = ncomp;
 483                 }
 484                 else
 485                 {
 486                         unsigned idx;
 487
 488                         idx = cso->num_per_vertex++;
 489                         cso->per_vertex[idx].idx = i;
 490                         cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
 491                         cso->per_vertex[idx].src_offset = ve->src_offset;
 492
 493                         idx = transkey.nr_elements++;
 494                         transkey.element[idx].input_format = ve->src_format;
 495                         transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
 496                         transkey.element[idx].input_offset = ve->src_offset;
 497                         transkey.element[idx].instance_divisor = 0;
 498                         transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
 499                         if(type)
 500                         {
 501                                 transkey.element[idx].output_format = ve->src_format;
 502                                 cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type;
 503                         }
 504                         else
 505                         {
 506                                 unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
 507                                 transkey.element[idx].output_format = float32[ncomp - 1];
 508                                 cso->needs_translate = TRUE;
 509                                 cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT;
 510                         }
 511                         transkey.element[idx].output_offset = transkey.output_stride;
 512                         transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
 513                 }
 514         }
 515
 516         cso->translate = translate_generic_create(&transkey);
 517         cso->vertex_length = transkey.output_stride >> 2;
 518         cso->max_vertices_per_packet = 2047 / cso->vertex_length;
 519
 520         return (void *)cso;
 521 }
 522
 523 static void
 524 nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 525 {
 526         FREE(hwcso);
 527 }
 528
 529 static void
 530 nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
 531 {
 532         struct nvfx_context *nvfx = nvfx_context(pipe);
 533
 534         nvfx->vtxelt = hwcso;
 535         nvfx->use_vertex_buffers = -1;
 536         nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 537 }
 538
 539 static void
 540 nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
 541                         const struct pipe_vertex_buffer *vb)
 542 {
 543         struct nvfx_context *nvfx = nvfx_context(pipe);
 544
 545         for(unsigned i = 0; i < count; ++i)
 546         {
 547                 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
 548                 nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
 549                 nvfx->vtxbuf[i].max_index = vb[i].max_index;
 550                 nvfx->vtxbuf[i].stride = vb[i].stride;
 551         }
 552
 553         for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
 554                 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
 555
 556         nvfx->vtxbuf_nr = count;
 557         nvfx->use_vertex_buffers = -1;
 558         nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 559 }
 560
 561 static void
 562 nvfx_set_index_buffer(struct pipe_context *pipe,
 563                       const struct pipe_index_buffer *ib)
 564 {
 565         struct nvfx_context *nvfx = nvfx_context(pipe);
 566
 567         if(ib)
 568         {
 569                 pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
 570                 nvfx->idxbuf.index_size = ib->index_size;
 571                 nvfx->idxbuf.offset = ib->offset;
 572         }
 573         else
 574         {
 575                 pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
 576                 nvfx->idxbuf.index_size = 0;
 577                 nvfx->idxbuf.offset = 0;
 578         }
 579
 580         nvfx->dirty |= NVFX_NEW_INDEX;
 581         nvfx->draw_dirty |= NVFX_NEW_INDEX;
 582 }
 583
 584 void
 585 nvfx_init_vbo_functions(struct nvfx_context *nvfx)
 586 {
 587         nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
 588         nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
 589
 590         nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
 591         nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
 592         nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
 593 }