nouveau: fix includes for latest libdrm
[mesa.git] / src / gallium / drivers / nvfx / nvfx_vbo.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "util/u_inlines.h"
4 #include "util/u_format.h"
5 #include "translate/translate.h"
6
7 #include "nvfx_context.h"
8 #include "nvfx_state.h"
9 #include "nvfx_resource.h"
10
11 #include "nouveau/nouveau_channel.h"
12 #include "nouveau/nv04_pushbuf.h"
13
14 static inline unsigned
15 util_guess_unique_indices_count(unsigned mode, unsigned indices)
16 {
17 /* Euler's formula gives V =
18 * = E - F + 2 =
19 * = F * (polygon_edges / 2 - 1) + 2 =
20 * = F * (polygon_edges - 2) / 2 + 2 =
21 * = indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
22 * = indices * (1 / 2 - 1 / polygon_edges) + 2
23 */
24 switch(mode)
25 {
26 case PIPE_PRIM_LINES:
27 return indices >> 1;
28 case PIPE_PRIM_TRIANGLES:
29 {
30 // avoid an expensive division by 3 using the multiplicative inverse mod 2^32
31 unsigned q;
32 unsigned inv3 = 2863311531;
33 indices >>= 1;
34 q = indices * inv3;
35 if(unlikely(q >= indices))
36 {
37 q += inv3;
38 if(q >= indices)
39 q += inv3;
40 }
41 return indices + 2;
42 //return indices / 6 + 2;
43 }
44 // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
45 case PIPE_PRIM_QUADS:
46 return (indices >> 1) + 2;
47 // return (indices >> 2) + 2; // if it is a closed mesh
48 default:
49 return indices;
50 }
51 }
52
53 static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
54 {
55 struct nvfx_context* nvfx = nvfx_context(pipe);
56 unsigned hardware_cost = 0;
57 unsigned inline_cost = 0;
58 unsigned unique_vertices;
59 unsigned upload_mode;
60 float best_index_cost_for_hardware_vertices_as_inline_cost;
61 boolean prefer_hardware_indices;
62 unsigned index_inline_cost;
63 unsigned index_hardware_cost;
64 if (info->indexed)
65 unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
66 else
67 unique_vertices = info->count;
68
69 /* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
70 * or create hardware buffer objects and pointing the hardware to them.
71 *
72 * This is done by computing the total memcpy cost of each option, ignoring uploads
73 * if we think that the buffer is static and thus the upload cost will be amortized over
74 * future draw calls.
75 *
76 * For instance, if everything looks static, we will always create buffer objects, while if
77 * everything is a user buffer and we are not doing indexed drawing, we never do.
78 *
79 * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
80 * where we will upload the vertex buffer, so that we can use hardware index lookup, and
81 * the opposite case, where we instead do index lookup in software to avoid uploading
82 * a huge amount of vertex data that is not going to be used.
83 *
84 * Otherwise, we generally move to the GPU the after it has been pushed
85 * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
86 * been updated with a transfer (or just the buffer having been destroyed).
87 *
88 * There is no special handling for user buffers, since applications can use
89 * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
90 * by the way.
91 *
92 * Note that currently we don't support only putting some data on the FIFO, and
93 * some on vertex buffers (constant and instanced data is independent from this).
94 *
95 * nVidia doesn't seem to do this either, even though it should be at least
96 * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
97 */
98
99 for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
100 {
101 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
102 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
103 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
104 buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
105 if (!nvfx_buffer_seems_static(buffer))
106 {
107 hardware_cost += buffer->dirty_end - buffer->dirty_begin;
108 if (!buffer->base.bo)
109 hardware_cost += nvfx->screen->buffer_allocation_cost;
110 }
111 inline_cost += vbi->per_vertex_size * info->count;
112 }
113
114 best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
115 prefer_hardware_indices = FALSE;
116 index_inline_cost = 0;
117 index_hardware_cost = 0;
118
119 if (info->indexed)
120 {
121 index_inline_cost = nvfx->idxbuf.index_size * info->count;
122 if (nvfx->screen->index_buffer_reloc_flags
123 && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
124 && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
125 {
126 struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
127 buffer->bytes_to_draw_until_static -= index_inline_cost;
128
129 prefer_hardware_indices = TRUE;
130
131 if (!nvfx_buffer_seems_static(buffer))
132 {
133 index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
134 if (!buffer->base.bo)
135 index_hardware_cost += nvfx->screen->buffer_allocation_cost;
136 }
137
138 if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
139 {
140 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
141 }
142 else
143 {
144 best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
145 prefer_hardware_indices = TRUE;
146 }
147 }
148 }
149
150 /* let's finally figure out which of the 3 paths we want to take */
151 if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
152 upload_mode = 1 + prefer_hardware_indices;
153 else
154 upload_mode = 0;
155
156 #ifdef DEBUG
157 if (unlikely(nvfx->screen->trace_draw))
158 {
159 fprintf(stderr, "DRAW");
160 if (info->indexed)
161 {
162 fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
163 if (info->index_bias)
164 fprintf(stderr, " biased %u", info->index_bias);
165 fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
166 }
167 if (info->instance_count > 1)
168 fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
169 fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
170 if (!upload_mode)
171 fprintf(stderr, " -> inline vertex data");
172 else if (upload_mode == 2 || !info->indexed)
173 fprintf(stderr, " -> buffer range");
174 else
175 fprintf(stderr, " -> inline indices");
176 fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
177 for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
178 {
179 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
180 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
181 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
182 if (i)
183 fprintf(stderr, ", ");
184 fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
185 }
186 fprintf(stderr, ">\n");
187 }
188 #endif
189
190 return upload_mode;
191 }
192
193 void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
194 {
195 struct nvfx_context *nvfx = nvfx_context(pipe);
196 unsigned upload_mode = 0;
197
198 if (!nvfx->vtxelt->needs_translate)
199 upload_mode = nvfx_decide_upload_mode(pipe, info);
200
201 nvfx->use_index_buffer = upload_mode > 1;
202
203 if ((upload_mode > 0) != nvfx->use_vertex_buffers)
204 {
205 nvfx->use_vertex_buffers = (upload_mode > 0);
206 nvfx->dirty |= NVFX_NEW_ARRAYS;
207 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
208 }
209
210 if (upload_mode > 0)
211 {
212 for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
213 {
214 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
215 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
216 nvfx_buffer_upload(nvfx_buffer(vb->buffer));
217 }
218
219 if (upload_mode > 1)
220 {
221 nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
222
223 if (unlikely(info->index_bias != nvfx->base_vertex))
224 {
225 nvfx->base_vertex = info->index_bias;
226 nvfx->dirty |= NVFX_NEW_ARRAYS;
227 }
228 }
229 else
230 {
231 if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
232 {
233 nvfx->base_vertex = 0;
234 nvfx->dirty |= NVFX_NEW_ARRAYS;
235 }
236 }
237 }
238
239 if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
240 nvfx_draw_vbo_swtnl(pipe, info);
241 else
242 nvfx_push_vbo(pipe, info);
243 }
244
245 boolean
246 nvfx_vbo_validate(struct nvfx_context *nvfx)
247 {
248 struct nouveau_channel* chan = nvfx->screen->base.channel;
249 int i;
250 int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
251 unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
252
253 if (!elements)
254 return TRUE;
255
256 MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
257 for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
258 {
259 struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
260 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
261 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
262 float v[4];
263 ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
264 nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
265 }
266
267
268 OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
269 if(nvfx->use_vertex_buffers)
270 {
271 unsigned idx = 0;
272 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
273 struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
274 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
275
276 if(idx != ve->idx)
277 {
278 assert(idx < ve->idx);
279 OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
280 idx = ve->idx;
281 }
282
283 OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV30_3D_VTXFMT_STRIDE__SHIFT));
284 ++idx;
285 }
286 if(idx != nvfx->vtxelt->num_elements)
287 OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
288 }
289 else
290 OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
291
292 for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
293 OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT);
294
295 if(nvfx->is_nv4x) {
296 unsigned i;
297 /* seems to be some kind of cache flushing */
298 for(i = 0; i < 3; ++i) {
299 OUT_RING(chan, RING_3D(0x1718, 1));
300 OUT_RING(chan, 0);
301 }
302 }
303
304 OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
305 if(nvfx->use_vertex_buffers)
306 {
307 unsigned idx = 0;
308 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
309 struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
310 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
311 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
312
313 for(; idx < ve->idx; ++idx)
314 OUT_RING(chan, 0);
315
316 OUT_RELOC(chan, bo,
317 vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
318 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
319 0, NV30_3D_VTXBUF_DMA1);
320 ++idx;
321 }
322
323 for(; idx < elements; ++idx)
324 OUT_RING(chan, 0);
325 }
326 else
327 {
328 for (i = 0; i < elements; i++)
329 OUT_RING(chan, 0);
330 }
331
332 OUT_RING(chan, RING_3D(0x1710, 1));
333 OUT_RING(chan, 0);
334
335 nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
336 nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
337 return TRUE;
338 }
339
340 void
341 nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
342 {
343 struct nouveau_channel* chan = nvfx->screen->base.channel;
344 unsigned num_outputs = nvfx->vertprog->draw_elements;
345 int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr);
346
347 if (!elements)
348 return;
349
350 WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
351
352 OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
353 for(unsigned i = 0; i < num_outputs; ++i)
354 OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT);
355 for(unsigned i = num_outputs; i < elements; ++i)
356 OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT);
357
358 if(nvfx->is_nv4x) {
359 unsigned i;
360 /* seems to be some kind of cache flushing */
361 for(i = 0; i < 3; ++i) {
362 OUT_RING(chan, RING_3D(0x1718, 1));
363 OUT_RING(chan, 0);
364 }
365 }
366
367 OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
368 for (unsigned i = 0; i < elements; i++)
369 OUT_RING(chan, 0);
370
371 OUT_RING(chan, RING_3D(0x1710, 1));
372 OUT_RING(chan, 0);
373
374 nvfx->hw_vtxelt_nr = num_outputs;
375 nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
376 }
377
378 void
379 nvfx_vbo_relocate(struct nvfx_context *nvfx)
380 {
381 struct nouveau_channel* chan;
382 unsigned vb_flags;
383 int i;
384
385 if(!nvfx->use_vertex_buffers)
386 return;
387
388 chan = nvfx->screen->base.channel;
389 vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
390
391 MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
392 for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
393 struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
394 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
395 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
396
397 OUT_RELOC(chan, bo, RING_3D(NV30_3D_VTXBUF(ve->idx), 1),
398 vb_flags, 0, 0);
399 OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
400 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
401 0, NV30_3D_VTXBUF_DMA1);
402 }
403 nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
404 }
405
406 static void
407 nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
408 {
409 struct nouveau_channel* chan = nvfx->screen->base.channel;
410 unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV30_3D_IDXBUF_FORMAT_TYPE_U16 : NV30_3D_IDXBUF_FORMAT_TYPE_U32;
411 struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
412 ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
413
414 assert(nvfx->screen->index_buffer_reloc_flags);
415
416 MARK_RING(chan, 3, 3);
417 if(ib_flags & NOUVEAU_BO_DUMMY)
418 OUT_RELOC(chan, bo, RING_3D(NV30_3D_IDXBUF_OFFSET, 2), ib_flags, 0, 0);
419 else
420 OUT_RING(chan, RING_3D(NV30_3D_IDXBUF_OFFSET, 2));
421 OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
422 OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
423 0, NV30_3D_IDXBUF_FORMAT_DMA1);
424 nvfx->relocs_needed &=~ NVFX_RELOCATE_IDXBUF;
425 }
426
427 void
428 nvfx_idxbuf_validate(struct nvfx_context* nvfx)
429 {
430 nvfx_idxbuf_emit(nvfx, 0);
431 }
432
433 void
434 nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
435 {
436 nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
437 }
438
439 unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
440 {
441 [PIPE_FORMAT_R32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
442 [PIPE_FORMAT_R32G32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
443 [PIPE_FORMAT_R32G32B32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
444 [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT,
445 [PIPE_FORMAT_R16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
446 [PIPE_FORMAT_R16G16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
447 [PIPE_FORMAT_R16G16B16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
448 [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT,
449 [PIPE_FORMAT_R8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
450 [PIPE_FORMAT_R8G8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
451 [PIPE_FORMAT_R8G8B8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
452 [PIPE_FORMAT_R8G8B8A8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM,
453 [PIPE_FORMAT_R8G8B8A8_USCALED] = NV30_3D_VTXFMT_TYPE_U8_USCALED,
454 [PIPE_FORMAT_R16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
455 [PIPE_FORMAT_R16G16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
456 [PIPE_FORMAT_R16G16B16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
457 [PIPE_FORMAT_R16G16B16A16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM,
458 [PIPE_FORMAT_R16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
459 [PIPE_FORMAT_R16G16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
460 [PIPE_FORMAT_R16G16B16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
461 [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED,
462 };
463
464 static void *
465 nvfx_vtxelts_state_create(struct pipe_context *pipe,
466 unsigned num_elements,
467 const struct pipe_vertex_element *elements)
468 {
469 struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
470 struct translate_key transkey;
471 unsigned per_vertex_size[16];
472 unsigned vb_compacted_index[16];
473
474 if(num_elements > 16)
475 {
476 _debug_printf("Error: application attempted to use %u vertex elements, but only 16 are supported: ignoring the rest\n", num_elements);
477 num_elements = 16;
478 }
479
480 memset(per_vertex_size, 0, sizeof(per_vertex_size));
481 memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
482 cso->num_elements = num_elements;
483 cso->needs_translate = FALSE;
484
485 transkey.nr_elements = 0;
486 transkey.output_stride = 0;
487
488 for(unsigned i = 0; i < num_elements; ++i)
489 {
490 const struct pipe_vertex_element* ve = &elements[i];
491 if(!ve->instance_divisor)
492 per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
493 }
494
495 for(unsigned i = 0; i < 16; ++i)
496 {
497 if(per_vertex_size[i])
498 {
499 unsigned idx = cso->num_per_vertex_buffer_infos++;
500 cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
501 cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
502 vb_compacted_index[i] = idx;
503 }
504 }
505
506 for(unsigned i = 0; i < num_elements; ++i)
507 {
508 const struct pipe_vertex_element* ve = &elements[i];
509 unsigned type = nvfx_vertex_formats[ve->src_format];
510 unsigned ncomp = util_format_get_nr_components(ve->src_format);
511
512 //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
513 if(ve->instance_divisor)
514 {
515 struct nvfx_low_frequency_element* lfve;
516 cso->vtxfmt[i] = NV30_3D_VTXFMT_TYPE_V32_FLOAT;
517
518 //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
519 if(0)
520 lfve = &cso->constant[cso->num_constant++];
521 else
522 {
523 lfve = &cso->per_instance[cso->num_per_instance++].base;
524 ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
525 }
526
527 lfve->idx = i;
528 lfve->vertex_buffer_index = ve->vertex_buffer_index;
529 lfve->src_offset = ve->src_offset;
530 lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
531 lfve->ncomp = ncomp;
532 }
533 else
534 {
535 unsigned idx;
536
537 idx = cso->num_per_vertex++;
538 cso->per_vertex[idx].idx = i;
539 cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
540 cso->per_vertex[idx].src_offset = ve->src_offset;
541
542 idx = transkey.nr_elements++;
543 transkey.element[idx].input_format = ve->src_format;
544 transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
545 transkey.element[idx].input_offset = ve->src_offset;
546 transkey.element[idx].instance_divisor = 0;
547 transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
548 if(type)
549 {
550 transkey.element[idx].output_format = ve->src_format;
551 cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | type;
552 }
553 else
554 {
555 unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
556 transkey.element[idx].output_format = float32[ncomp - 1];
557 cso->needs_translate = TRUE;
558 cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT;
559 }
560 transkey.element[idx].output_offset = transkey.output_stride;
561 transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
562 }
563 }
564
565 cso->translate = translate_create(&transkey);
566 cso->vertex_length = transkey.output_stride >> 2;
567 cso->max_vertices_per_packet = 2047 / MAX2(cso->vertex_length, 1);
568
569 return (void *)cso;
570 }
571
572 static void
573 nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
574 {
575 FREE(hwcso);
576 }
577
578 static void
579 nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
580 {
581 struct nvfx_context *nvfx = nvfx_context(pipe);
582
583 nvfx->vtxelt = hwcso;
584 nvfx->use_vertex_buffers = -1;
585 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
586 }
587
588 static void
589 nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
590 const struct pipe_vertex_buffer *vb)
591 {
592 struct nvfx_context *nvfx = nvfx_context(pipe);
593
594 for(unsigned i = 0; i < count; ++i)
595 {
596 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
597 nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
598 nvfx->vtxbuf[i].max_index = vb[i].max_index;
599 nvfx->vtxbuf[i].stride = vb[i].stride;
600 }
601
602 for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
603 pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
604
605 nvfx->vtxbuf_nr = count;
606 nvfx->use_vertex_buffers = -1;
607 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
608 }
609
610 static void
611 nvfx_set_index_buffer(struct pipe_context *pipe,
612 const struct pipe_index_buffer *ib)
613 {
614 struct nvfx_context *nvfx = nvfx_context(pipe);
615
616 if(ib)
617 {
618 pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
619 nvfx->idxbuf.index_size = ib->index_size;
620 nvfx->idxbuf.offset = ib->offset;
621 }
622 else
623 {
624 pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
625 nvfx->idxbuf.index_size = 0;
626 nvfx->idxbuf.offset = 0;
627 }
628
629 nvfx->dirty |= NVFX_NEW_INDEX;
630 nvfx->draw_dirty |= NVFX_NEW_INDEX;
631 }
632
633 void
634 nvfx_init_vbo_functions(struct nvfx_context *nvfx)
635 {
636 nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
637 nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
638
639 nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
640 nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
641 nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
642 }