docs: Update status of GL 3.x related extensions
[mesa.git] / src / gallium / drivers / nvfx / nvfx_vbo.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "util/u_inlines.h"
4 #include "util/u_format.h"
5 #include "translate/translate.h"
6
7 #include "nvfx_context.h"
8 #include "nvfx_state.h"
9 #include "nvfx_resource.h"
10
11 #include "nouveau/nouveau_channel.h"
12
13 #include "nouveau/nouveau_pushbuf.h"
14
15 static inline unsigned
16 util_guess_unique_indices_count(unsigned mode, unsigned indices)
17 {
18 /* Euler's formula gives V =
19 * = E - F + 2 =
20 * = F * (polygon_edges / 2 - 1) + 2 =
21 * = F * (polygon_edges - 2) / 2 + 2 =
22 * = indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
23 * = indices * (1 / 2 - 1 / polygon_edges) + 2
24 */
25 switch(mode)
26 {
27 case PIPE_PRIM_LINES:
28 return indices >> 1;
29 case PIPE_PRIM_TRIANGLES:
30 {
31 // avoid an expensive division by 3 using the multiplicative inverse mod 2^32
32 unsigned q;
33 unsigned inv3 = 2863311531;
34 indices >>= 1;
35 q = indices * inv3;
36 if(unlikely(q >= indices))
37 {
38 q += inv3;
39 if(q >= indices)
40 q += inv3;
41 }
42 return indices + 2;
43 //return indices / 6 + 2;
44 }
45 // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
46 case PIPE_PRIM_QUADS:
47 return (indices >> 1) + 2;
48 // return (indices >> 2) + 2; // if it is a closed mesh
49 default:
50 return indices;
51 }
52 }
53
54 static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
55 {
56 struct nvfx_context* nvfx = nvfx_context(pipe);
57 unsigned hardware_cost = 0;
58 unsigned inline_cost = 0;
59 unsigned unique_vertices;
60 unsigned upload_mode;
61 float best_index_cost_for_hardware_vertices_as_inline_cost;
62 boolean prefer_hardware_indices;
63 unsigned index_inline_cost;
64 unsigned index_hardware_cost;
65 if (info->indexed)
66 unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
67 else
68 unique_vertices = info->count;
69
70 /* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
71 * or create hardware buffer objects and pointing the hardware to them.
72 *
73 * This is done by computing the total memcpy cost of each option, ignoring uploads
74 * if we think that the buffer is static and thus the upload cost will be amortized over
75 * future draw calls.
76 *
77 * For instance, if everything looks static, we will always create buffer objects, while if
78 * everything is a user buffer and we are not doing indexed drawing, we never do.
79 *
80 * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
81 * where we will upload the vertex buffer, so that we can use hardware index lookup, and
82 * the opposite case, where we instead do index lookup in software to avoid uploading
83 * a huge amount of vertex data that is not going to be used.
84 *
85 * Otherwise, we generally move to the GPU the after it has been pushed
86 * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
87 * been updated with a transfer (or just the buffer having been destroyed).
88 *
89 * There is no special handling for user buffers, since applications can use
90 * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
91 * by the way.
92 *
93 * Note that currently we don't support only putting some data on the FIFO, and
94 * some on vertex buffers (constant and instanced data is independent from this).
95 *
96 * nVidia doesn't seem to do this either, even though it should be at least
97 * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
98 */
99
100 for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
101 {
102 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
103 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
104 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
105 buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
106 if (!nvfx_buffer_seems_static(buffer))
107 {
108 hardware_cost += buffer->dirty_end - buffer->dirty_begin;
109 if (!buffer->base.bo)
110 hardware_cost += nvfx->screen->buffer_allocation_cost;
111 }
112 inline_cost += vbi->per_vertex_size * info->count;
113 }
114
115 best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
116 prefer_hardware_indices = FALSE;
117 index_inline_cost = 0;
118 index_hardware_cost = 0;
119
120 if (info->indexed)
121 {
122 index_inline_cost = nvfx->idxbuf.index_size * info->count;
123 if (nvfx->screen->index_buffer_reloc_flags
124 && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
125 && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
126 {
127 struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
128 buffer->bytes_to_draw_until_static -= index_inline_cost;
129
130 prefer_hardware_indices = TRUE;
131
132 if (!nvfx_buffer_seems_static(buffer))
133 {
134 index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
135 if (!buffer->base.bo)
136 index_hardware_cost += nvfx->screen->buffer_allocation_cost;
137 }
138
139 if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
140 {
141 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
142 }
143 else
144 {
145 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
146 prefer_hardware_indices = TRUE;
147 }
148 }
149 }
150
151 /* let's finally figure out which of the 3 paths we want to take */
152 if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
153 upload_mode = 1 + prefer_hardware_indices;
154 else
155 upload_mode = 0;
156
157 #ifdef DEBUG
158 if (unlikely(nvfx->screen->trace_draw))
159 {
160 fprintf(stderr, "DRAW");
161 if (info->indexed)
162 {
163 fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
164 if (info->index_bias)
165 fprintf(stderr, " biased %u", info->index_bias);
166 fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
167 }
168 if (info->instance_count > 1)
169 fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
170 fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
171 if (!upload_mode)
172 fprintf(stderr, " -> inline vertex data");
173 else if (upload_mode == 2 || !info->indexed)
174 fprintf(stderr, " -> buffer range");
175 else
176 fprintf(stderr, " -> inline indices");
177 fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
178 for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
179 {
180 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
181 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
182 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
183 if (i)
184 fprintf(stderr, ", ");
185 fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
186 }
187 fprintf(stderr, ">\n");
188 }
189 #endif
190
191 return upload_mode;
192 }
193
194 void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
195 {
196 struct nvfx_context *nvfx = nvfx_context(pipe);
197 unsigned upload_mode = 0;
198
199 if (!nvfx->vtxelt->needs_translate)
200 upload_mode = nvfx_decide_upload_mode(pipe, info);
201
202 nvfx->use_index_buffer = upload_mode > 1;
203
204 if ((upload_mode > 0) != nvfx->use_vertex_buffers)
205 {
206 nvfx->use_vertex_buffers = (upload_mode > 0);
207 nvfx->dirty |= NVFX_NEW_ARRAYS;
208 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
209 }
210
211 if (upload_mode > 0)
212 {
213 for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
214 {
215 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
216 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
217 nvfx_buffer_upload(nvfx_buffer(vb->buffer));
218 }
219
220 if (upload_mode > 1)
221 {
222 nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
223
224 if (unlikely(info->index_bias != nvfx->base_vertex))
225 {
226 nvfx->base_vertex = info->index_bias;
227 nvfx->dirty |= NVFX_NEW_ARRAYS;
228 }
229 }
230 else
231 {
232 if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
233 {
234 nvfx->base_vertex = 0;
235 nvfx->dirty |= NVFX_NEW_ARRAYS;
236 }
237 }
238 }
239
240 if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
241 nvfx_draw_vbo_swtnl(pipe, info);
242 else
243 nvfx_push_vbo(pipe, info);
244 }
245
246 boolean
247 nvfx_vbo_validate(struct nvfx_context *nvfx)
248 {
249 struct nouveau_channel* chan = nvfx->screen->base.channel;
250 int i;
251 int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
252 unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
253
254 if (!elements)
255 return TRUE;
256
257 MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
258 for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
259 {
260 struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
261 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
262 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
263 float v[4];
264 ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
265 nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
266 }
267
268
269 OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
270 if(nvfx->use_vertex_buffers)
271 {
272 unsigned idx = 0;
273 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
274 struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
275 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
276
277 if(idx != ve->idx)
278 {
279 assert(idx < ve->idx);
280 OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
281 idx = ve->idx;
282 }
283
284 OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV30_3D_VTXFMT_STRIDE__SHIFT));
285 ++idx;
286 }
287 if(idx != nvfx->vtxelt->num_elements)
288 OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
289 }
290 else
291 OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
292
293 for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
294 OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT);
295
296 if(nvfx->is_nv4x) {
297 unsigned i;
298 /* seems to be some kind of cache flushing */
299 for(i = 0; i < 3; ++i) {
300 OUT_RING(chan, RING_3D(0x1718, 1));
301 OUT_RING(chan, 0);
302 }
303 }
304
305 OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
306 if(nvfx->use_vertex_buffers)
307 {
308 unsigned idx = 0;
309 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
310 struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
311 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
312 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
313
314 for(; idx < ve->idx; ++idx)
315 OUT_RING(chan, 0);
316
317 OUT_RELOC(chan, bo,
318 vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
319 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
320 0, NV30_3D_VTXBUF_DMA1);
321 ++idx;
322 }
323
324 for(; idx < elements; ++idx)
325 OUT_RING(chan, 0);
326 }
327 else
328 {
329 for (i = 0; i < elements; i++)
330 OUT_RING(chan, 0);
331 }
332
333 OUT_RING(chan, RING_3D(0x1710, 1));
334 OUT_RING(chan, 0);
335
336 nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
337 nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
338 return TRUE;
339 }
340
341 void
342 nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
343 {
344 struct nouveau_channel* chan = nvfx->screen->base.channel;
345 unsigned num_outputs = nvfx->vertprog->draw_elements;
346 int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr);
347
348 if (!elements)
349 return;
350
351 WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
352
353 OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
354 for(unsigned i = 0; i < num_outputs; ++i)
355 OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT);
356 for(unsigned i = num_outputs; i < elements; ++i)
357 OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT);
358
359 if(nvfx->is_nv4x) {
360 unsigned i;
361 /* seems to be some kind of cache flushing */
362 for(i = 0; i < 3; ++i) {
363 OUT_RING(chan, RING_3D(0x1718, 1));
364 OUT_RING(chan, 0);
365 }
366 }
367
368 OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
369 for (unsigned i = 0; i < elements; i++)
370 OUT_RING(chan, 0);
371
372 OUT_RING(chan, RING_3D(0x1710, 1));
373 OUT_RING(chan, 0);
374
375 nvfx->hw_vtxelt_nr = num_outputs;
376 nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
377 }
378
379 void
380 nvfx_vbo_relocate(struct nvfx_context *nvfx)
381 {
382 struct nouveau_channel* chan;
383 unsigned vb_flags;
384 int i;
385
386 if(!nvfx->use_vertex_buffers)
387 return;
388
389 chan = nvfx->screen->base.channel;
390 vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
391
392 MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
393 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
394 struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
395 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
396 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
397
398 OUT_RELOC(chan, bo, RING_3D(NV30_3D_VTXBUF(ve->idx), 1),
399 vb_flags, 0, 0);
400 OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
401 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
402 0, NV30_3D_VTXBUF_DMA1);
403 }
404 nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
405 }
406
407 static void
408 nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
409 {
410 struct nouveau_channel* chan = nvfx->screen->base.channel;
411 unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV30_3D_IDXBUF_FORMAT_TYPE_U16 : NV30_3D_IDXBUF_FORMAT_TYPE_U32;
412 struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
413 ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
414
415 assert(nvfx->screen->index_buffer_reloc_flags);
416
417 MARK_RING(chan, 3, 3);
418 if(ib_flags & NOUVEAU_BO_DUMMY)
419 OUT_RELOC(chan, bo, RING_3D(NV30_3D_IDXBUF_OFFSET, 2), ib_flags, 0, 0);
420 else
421 OUT_RING(chan, RING_3D(NV30_3D_IDXBUF_OFFSET, 2));
422 OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
423 OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
424 0, NV30_3D_IDXBUF_FORMAT_DMA1);
425 nvfx->relocs_needed &=~ NVFX_RELOCATE_IDXBUF;
426 }
427
428 void
429 nvfx_idxbuf_validate(struct nvfx_context* nvfx)
430 {
431 nvfx_idxbuf_emit(nvfx, 0);
432 }
433
434 void
435 nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
436 {
437 nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
438 }
439
440 unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
441 {
442 [PIPE_FORMAT_R32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
443 [PIPE_FORMAT_R32G32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
444 [PIPE_FORMAT_R32G32B32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
445 [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
446 [PIPE_FORMAT_R16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
447 [PIPE_FORMAT_R16G16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
448 [PIPE_FORMAT_R16G16B16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
449 [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
450 [PIPE_FORMAT_R8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
451 [PIPE_FORMAT_R8G8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
452 [PIPE_FORMAT_R8G8B8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
453 [PIPE_FORMAT_R8G8B8A8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
454 [PIPE_FORMAT_R8G8B8A8_USCALED] = NV30_3D_VTXFMT_TYPE_U8_USCALED,
455 [PIPE_FORMAT_R16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
456 [PIPE_FORMAT_R16G16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
457 [PIPE_FORMAT_R16G16B16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
458 [PIPE_FORMAT_R16G16B16A16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
459 [PIPE_FORMAT_R16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
460 [PIPE_FORMAT_R16G16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
461 [PIPE_FORMAT_R16G16B16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
462 [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
463 };
464
465 static void *
466 nvfx_vtxelts_state_create(struct pipe_context *pipe,
467 unsigned num_elements,
468 const struct pipe_vertex_element *elements)
469 {
470 struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
471 struct translate_key transkey;
472 unsigned per_vertex_size[16];
473 unsigned vb_compacted_index[16];
474
475 if(num_elements > 16)
476 {
477 _debug_printf("Error: application attempted to use %u vertex elements, but only 16 are supported: ignoring the rest\n", num_elements);
478 num_elements = 16;
479 }
480
481 memset(per_vertex_size, 0, sizeof(per_vertex_size));
482 memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
483 cso->num_elements = num_elements;
484 cso->needs_translate = FALSE;
485
486 transkey.nr_elements = 0;
487 transkey.output_stride = 0;
488
489 for(unsigned i = 0; i < num_elements; ++i)
490 {
491 const struct pipe_vertex_element* ve = &elements[i];
492 if(!ve->instance_divisor)
493 per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
494 }
495
496 for(unsigned i = 0; i < 16; ++i)
497 {
498 if(per_vertex_size[i])
499 {
500 unsigned idx = cso->num_per_vertex_buffer_infos++;
501 cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
502 cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
503 vb_compacted_index[i] = idx;
504 }
505 }
506
507 for(unsigned i = 0; i < num_elements; ++i)
508 {
509 const struct pipe_vertex_element* ve = &elements[i];
510 unsigned type = nvfx_vertex_formats[ve->src_format];
511 unsigned ncomp = util_format_get_nr_components(ve->src_format);
512
513 //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
514 if(ve->instance_divisor)
515 {
516 struct nvfx_low_frequency_element* lfve;
517 cso->vtxfmt[i] = NV30_3D_VTXFMT_TYPE_V32_FLOAT;
518
519 //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
520 if(0)
521 lfve = &cso->constant[cso->num_constant++];
522 else
523 {
524 lfve = &cso->per_instance[cso->num_per_instance++].base;
525 ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
526 }
527
528 lfve->idx = i;
529 lfve->vertex_buffer_index = ve->vertex_buffer_index;
530 lfve->src_offset = ve->src_offset;
531 lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
532 lfve->ncomp = ncomp;
533 }
534 else
535 {
536 unsigned idx;
537
538 idx = cso->num_per_vertex++;
539 cso->per_vertex[idx].idx = i;
540 cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
541 cso->per_vertex[idx].src_offset = ve->src_offset;
542
543 idx = transkey.nr_elements++;
544 transkey.element[idx].input_format = ve->src_format;
545 transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
546 transkey.element[idx].input_offset = ve->src_offset;
547 transkey.element[idx].instance_divisor = 0;
548 transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
549 if(type)
550 {
551 transkey.element[idx].output_format = ve->src_format;
552 cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | type;
553 }
554 else
555 {
556 unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
557 transkey.element[idx].output_format = float32[ncomp - 1];
558 cso->needs_translate = TRUE;
559 cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT;
560 }
561 transkey.element[idx].output_offset = transkey.output_stride;
562 transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
563 }
564 }
565
566 cso->translate = translate_create(&transkey);
567 cso->vertex_length = transkey.output_stride >> 2;
568 cso->max_vertices_per_packet = 2047 / MAX2(cso->vertex_length, 1);
569
570 return (void *)cso;
571 }
572
573 static void
574 nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
575 {
576 FREE(hwcso);
577 }
578
579 static void
580 nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
581 {
582 struct nvfx_context *nvfx = nvfx_context(pipe);
583
584 nvfx->vtxelt = hwcso;
585 nvfx->use_vertex_buffers = -1;
586 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
587 }
588
589 static void
590 nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
591 const struct pipe_vertex_buffer *vb)
592 {
593 struct nvfx_context *nvfx = nvfx_context(pipe);
594
595 for(unsigned i = 0; i < count; ++i)
596 {
597 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
598 nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
599 nvfx->vtxbuf[i].max_index = vb[i].max_index;
600 nvfx->vtxbuf[i].stride = vb[i].stride;
601 }
602
603 for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
604 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
605
606 nvfx->vtxbuf_nr = count;
607 nvfx->use_vertex_buffers = -1;
608 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
609 }
610
611 static void
612 nvfx_set_index_buffer(struct pipe_context *pipe,
613 const struct pipe_index_buffer *ib)
614 {
615 struct nvfx_context *nvfx = nvfx_context(pipe);
616
617 if(ib)
618 {
619 pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
620 nvfx->idxbuf.index_size = ib->index_size;
621 nvfx->idxbuf.offset = ib->offset;
622 }
623 else
624 {
625 pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
626 nvfx->idxbuf.index_size = 0;
627 nvfx->idxbuf.offset = 0;
628 }
629
630 nvfx->dirty |= NVFX_NEW_INDEX;
631 nvfx->draw_dirty |= NVFX_NEW_INDEX;
632 }
633
634 void
635 nvfx_init_vbo_functions(struct nvfx_context *nvfx)
636 {
637 nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
638 nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
639
640 nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
641 nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
642 nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
643 }