nv50: implement instanced drawing
[mesa.git] / src / gallium / drivers / nv50 / nv50_vbo.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_state.h"
25 #include "pipe/p_inlines.h"
26
27 #include "util/u_format.h"
28
29 #include "nv50_context.h"
30
31 static boolean
32 nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
33
34 static boolean
35 nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
36
37 static boolean
38 nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
39
40 static boolean
41 nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
42
43 static INLINE unsigned
44 nv50_prim(unsigned mode)
45 {
46 switch (mode) {
47 case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
48 case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
49 case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
50 case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
51 case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
52 case PIPE_PRIM_TRIANGLE_STRIP:
53 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
54 case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
55 case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
56 case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
57 case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
58 case PIPE_PRIM_LINES_ADJACENCY:
59 return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
60 case PIPE_PRIM_LINE_STRIP_ADJACENCY:
61 return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
62 case PIPE_PRIM_TRIANGLES_ADJACENCY:
63 return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
64 case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
65 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
66 default:
67 break;
68 }
69
70 NOUVEAU_ERR("invalid primitive type %d\n", mode);
71 return NV50TCL_VERTEX_BEGIN_POINTS;
72 }
73
74 static INLINE uint32_t
75 nv50_vbo_type_to_hw(enum pipe_format format)
76 {
77 const struct util_format_description *desc;
78
79 desc = util_format_description(format);
80 assert(desc);
81
82 switch (desc->channel[0].type) {
83 case UTIL_FORMAT_TYPE_FLOAT:
84 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
85 case UTIL_FORMAT_TYPE_UNSIGNED:
86 if (desc->channel[0].normalized) {
87 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
88 }
89 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
90 case UTIL_FORMAT_TYPE_SIGNED:
91 if (desc->channel[0].normalized) {
92 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
93 }
94 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
95 /*
96 case PIPE_FORMAT_TYPE_UINT:
97 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
98 case PIPE_FORMAT_TYPE_SINT:
99 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */
100 default:
101 return 0;
102 }
103 }
104
105 static INLINE uint32_t
106 nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
107 {
108 static const uint32_t hw_values[] = {
109 0, 0, 0, 0,
110 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
111 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
112 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
113 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
114 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
115 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
116 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
117 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
118 0, 0, 0, 0,
119 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
120 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
121 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
122 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
123
124 /* we'd also have R11G11B10 and R10G10B10A2 */
125
126 assert(nr_c > 0 && nr_c <= 4);
127
128 if (size > 32)
129 return 0;
130 size >>= (3 - 2);
131
132 return hw_values[size + (nr_c - 1)];
133 }
134
135 static INLINE uint32_t
136 nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
137 {
138 uint32_t hw_type, hw_size;
139 enum pipe_format pf = ve->src_format;
140 const struct util_format_description *desc;
141 unsigned size;
142
143 desc = util_format_description(pf);
144 assert(desc);
145
146 size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
147
148 hw_type = nv50_vbo_type_to_hw(pf);
149 hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
150
151 if (!hw_type || !hw_size) {
152 NOUVEAU_ERR("unsupported vbo format: %s\n", pf_name(pf));
153 abort();
154 return 0x24e80000;
155 }
156
157 if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
158 hw_size |= (1 << 31); /* no real swizzle bits :-( */
159
160 return (hw_type | hw_size);
161 }
162
163 /* For instanced drawing from user buffers, hitting the FIFO repeatedly
164 * with the same vertex data is probably worse than uploading all data.
165 */
166 static boolean
167 nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
168 {
169 struct nv50_screen *nscreen = nv50->screen;
170 struct pipe_screen *pscreen = &nscreen->base.base;
171 struct pipe_buffer *buf = nscreen->strm_vbuf[i];
172 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
173 uint8_t *src;
174 unsigned size = MAX2(vb->buffer->size, 4096);
175
176 if (buf && buf->size < size)
177 pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
178
179 if (!nscreen->strm_vbuf[i]) {
180 nscreen->strm_vbuf[i] = pipe_buffer_create(
181 pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
182 buf = nscreen->strm_vbuf[i];
183 }
184
185 src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
186 if (!src)
187 return FALSE;
188 src += vb->buffer_offset;
189
190 size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
191 if (vb->buffer_offset + size > vb->buffer->size)
192 size = vb->buffer->size - vb->buffer_offset;
193
194 pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
195 pipe_buffer_unmap(pscreen, vb->buffer);
196
197 vb->buffer = buf; /* don't pipe_reference, this is a private copy */
198 return TRUE;
199 }
200
201 static void
202 nv50_upload_user_vbufs(struct nv50_context *nv50)
203 {
204 unsigned i;
205
206 if (nv50->vbo_fifo)
207 nv50->dirty |= NV50_NEW_ARRAYS;
208 if (!(nv50->dirty & NV50_NEW_ARRAYS))
209 return;
210
211 for (i = 0; i < nv50->vtxbuf_nr; ++i) {
212 if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
213 continue;
214 nv50_upload_vtxbuf(nv50, i);
215 }
216 }
217
218 static unsigned
219 init_per_instance_arrays(struct nv50_context *nv50,
220 unsigned startInstance,
221 unsigned pos[16], unsigned step[16])
222 {
223 struct nouveau_grobj *tesla = nv50->screen->tesla;
224 struct nouveau_channel *chan = tesla->channel;
225 struct nouveau_bo *bo;
226 struct nouveau_stateobj *so;
227 unsigned i, b, count = 0;
228 const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
229
230 so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
231
232 for (i = 0; i < nv50->vtxelt_nr; ++i) {
233 if (!nv50->vtxelt[i].instance_divisor)
234 continue;
235 ++count;
236 b = nv50->vtxelt[i].vertex_buffer_index;
237
238 pos[i] = nv50->vtxelt[i].src_offset +
239 nv50->vtxbuf[b].buffer_offset +
240 startInstance * nv50->vtxbuf[b].stride;
241
242 if (!startInstance) {
243 step[i] = 0;
244 continue;
245 }
246 step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
247
248 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
249
250 so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
251 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
252 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
253 }
254
255 if (count) {
256 so_ref (so, &nv50->state.instbuf); /* for flush notify */
257 so_emit(chan, nv50->state.instbuf);
258 }
259 so_ref (NULL, &so);
260
261 return count;
262 }
263
264 static void
265 step_per_instance_arrays(struct nv50_context *nv50,
266 unsigned pos[16], unsigned step[16])
267 {
268 struct nouveau_grobj *tesla = nv50->screen->tesla;
269 struct nouveau_channel *chan = tesla->channel;
270 struct nouveau_bo *bo;
271 struct nouveau_stateobj *so;
272 unsigned i, b;
273 const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
274
275 so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
276
277 for (i = 0; i < nv50->vtxelt_nr; ++i) {
278 if (!nv50->vtxelt[i].instance_divisor)
279 continue;
280 b = nv50->vtxelt[i].vertex_buffer_index;
281
282 if (++step[i] == nv50->vtxelt[i].instance_divisor) {
283 step[i] = 0;
284 pos[i] += nv50->vtxbuf[b].stride;
285 }
286
287 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
288
289 so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
290 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
291 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
292 }
293
294 so_ref (so, &nv50->state.instbuf); /* for flush notify */
295 so_ref (NULL, &so);
296
297 so_emit(chan, nv50->state.instbuf);
298 }
299
300 void
301 nv50_draw_arrays_instanced(struct pipe_context *pipe,
302 unsigned mode, unsigned start, unsigned count,
303 unsigned startInstance, unsigned instanceCount)
304 {
305 struct nv50_context *nv50 = nv50_context(pipe);
306 struct nouveau_channel *chan = nv50->screen->tesla->channel;
307 struct nouveau_grobj *tesla = nv50->screen->tesla;
308 unsigned i, nz_divisors;
309 unsigned step[16], pos[16];
310
311 nv50_upload_user_vbufs(nv50);
312
313 nv50_state_validate(nv50);
314
315 nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
316
317 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
318 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
319 OUT_RING (chan, startInstance);
320
321 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
322 OUT_RING (chan, nv50_prim(mode));
323 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
324 OUT_RING (chan, start);
325 OUT_RING (chan, count);
326 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
327 OUT_RING (chan, 0);
328
329 for (i = 1; i < instanceCount; i++) {
330 if (nz_divisors) /* any non-zero array divisors ? */
331 step_per_instance_arrays(nv50, pos, step);
332
333 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
334 OUT_RING (chan, nv50_prim(mode) | (1 << 28));
335 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
336 OUT_RING (chan, start);
337 OUT_RING (chan, count);
338 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
339 OUT_RING (chan, 0);
340 }
341
342 so_ref(NULL, &nv50->state.instbuf);
343 }
344
345 void
346 nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
347 unsigned count)
348 {
349 struct nv50_context *nv50 = nv50_context(pipe);
350 struct nouveau_channel *chan = nv50->screen->tesla->channel;
351 struct nouveau_grobj *tesla = nv50->screen->tesla;
352 boolean ret;
353
354 nv50_state_validate(nv50);
355
356 BEGIN_RING(chan, tesla, 0x142c, 1);
357 OUT_RING (chan, 0);
358 BEGIN_RING(chan, tesla, 0x142c, 1);
359 OUT_RING (chan, 0);
360
361 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
362 OUT_RING (chan, nv50_prim(mode));
363
364 if (nv50->vbo_fifo)
365 ret = nv50_push_arrays(nv50, start, count);
366 else {
367 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
368 OUT_RING (chan, start);
369 OUT_RING (chan, count);
370 ret = TRUE;
371 }
372 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
373 OUT_RING (chan, 0);
374
375 /* XXX: not sure what to do if ret != TRUE: flush and retry?
376 */
377 assert(ret);
378 }
379
380 static INLINE boolean
381 nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
382 unsigned start, unsigned count)
383 {
384 struct nouveau_channel *chan = nv50->screen->tesla->channel;
385 struct nouveau_grobj *tesla = nv50->screen->tesla;
386
387 map += start;
388
389 if (nv50->vbo_fifo)
390 return nv50_push_elements_u08(nv50, map, count);
391
392 if (count & 1) {
393 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
394 OUT_RING (chan, map[0]);
395 map++;
396 count--;
397 }
398
399 while (count) {
400 unsigned nr = count > 2046 ? 2046 : count;
401 int i;
402
403 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
404 for (i = 0; i < nr; i += 2)
405 OUT_RING (chan, (map[i + 1] << 16) | map[i]);
406
407 count -= nr;
408 map += nr;
409 }
410 return TRUE;
411 }
412
413 static INLINE boolean
414 nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
415 unsigned start, unsigned count)
416 {
417 struct nouveau_channel *chan = nv50->screen->tesla->channel;
418 struct nouveau_grobj *tesla = nv50->screen->tesla;
419
420 map += start;
421
422 if (nv50->vbo_fifo)
423 return nv50_push_elements_u16(nv50, map, count);
424
425 if (count & 1) {
426 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
427 OUT_RING (chan, map[0]);
428 map++;
429 count--;
430 }
431
432 while (count) {
433 unsigned nr = count > 2046 ? 2046 : count;
434 int i;
435
436 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
437 for (i = 0; i < nr; i += 2)
438 OUT_RING (chan, (map[i + 1] << 16) | map[i]);
439
440 count -= nr;
441 map += nr;
442 }
443 return TRUE;
444 }
445
446 static INLINE boolean
447 nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
448 unsigned start, unsigned count)
449 {
450 struct nouveau_channel *chan = nv50->screen->tesla->channel;
451 struct nouveau_grobj *tesla = nv50->screen->tesla;
452
453 map += start;
454
455 if (nv50->vbo_fifo)
456 return nv50_push_elements_u32(nv50, map, count);
457
458 while (count) {
459 unsigned nr = count > 2047 ? 2047 : count;
460
461 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr);
462 OUT_RINGp (chan, map, nr);
463
464 count -= nr;
465 map += nr;
466 }
467 return TRUE;
468 }
469
470 static INLINE void
471 nv50_draw_elements_inline(struct nv50_context *nv50,
472 void *map, unsigned indexSize,
473 unsigned start, unsigned count)
474 {
475 switch (indexSize) {
476 case 1:
477 nv50_draw_elements_inline_u08(nv50, map, start, count);
478 break;
479 case 2:
480 nv50_draw_elements_inline_u16(nv50, map, start, count);
481 break;
482 case 4:
483 nv50_draw_elements_inline_u32(nv50, map, start, count);
484 break;
485 }
486 }
487
488 void
489 nv50_draw_elements_instanced(struct pipe_context *pipe,
490 struct pipe_buffer *indexBuffer,
491 unsigned indexSize,
492 unsigned mode, unsigned start, unsigned count,
493 unsigned startInstance, unsigned instanceCount)
494 {
495 struct nv50_context *nv50 = nv50_context(pipe);
496 struct nouveau_grobj *tesla = nv50->screen->tesla;
497 struct nouveau_channel *chan = tesla->channel;
498 struct pipe_screen *pscreen = pipe->screen;
499 void *map;
500 unsigned i, nz_divisors;
501 unsigned step[16], pos[16];
502
503 map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
504
505 nv50_upload_user_vbufs(nv50);
506
507 nv50_state_validate(nv50);
508
509 nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
510
511 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
512 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
513 OUT_RING (chan, startInstance);
514
515 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
516 OUT_RING (chan, nv50_prim(mode) | (1 << 28));
517
518 nv50_draw_elements_inline(nv50, map, indexSize, start, count);
519
520 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
521 OUT_RING (chan, 0);
522
523 for (i = 1; i < instanceCount; ++i) {
524 if (nz_divisors) /* any non-zero array divisors ? */
525 step_per_instance_arrays(nv50, pos, step);
526
527 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
528 OUT_RING (chan, nv50_prim(mode) | (1 << 28));
529
530 nv50_draw_elements_inline(nv50, map, indexSize, start, count);
531
532 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
533 OUT_RING (chan, 0);
534 }
535
536 so_ref(NULL, &nv50->state.instbuf);
537 }
538
539 void
540 nv50_draw_elements(struct pipe_context *pipe,
541 struct pipe_buffer *indexBuffer, unsigned indexSize,
542 unsigned mode, unsigned start, unsigned count)
543 {
544 struct nv50_context *nv50 = nv50_context(pipe);
545 struct nouveau_channel *chan = nv50->screen->tesla->channel;
546 struct nouveau_grobj *tesla = nv50->screen->tesla;
547 struct pipe_screen *pscreen = pipe->screen;
548 void *map;
549
550 map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
551
552 nv50_state_validate(nv50);
553
554 BEGIN_RING(chan, tesla, 0x142c, 1);
555 OUT_RING (chan, 0);
556 BEGIN_RING(chan, tesla, 0x142c, 1);
557 OUT_RING (chan, 0);
558
559 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
560 OUT_RING (chan, nv50_prim(mode));
561
562 nv50_draw_elements_inline(nv50, map, indexSize, start, count);
563
564 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
565 OUT_RING (chan, 0);
566
567 pipe_buffer_unmap(pscreen, indexBuffer);
568 }
569
570 static INLINE boolean
571 nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
572 struct nouveau_stateobj **pso,
573 struct pipe_vertex_element *ve,
574 struct pipe_vertex_buffer *vb)
575
576 {
577 struct nouveau_stateobj *so;
578 struct nouveau_grobj *tesla = nv50->screen->tesla;
579 struct nouveau_bo *bo = nouveau_bo(vb->buffer);
580 float v[4];
581 int ret;
582
583 ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
584 if (ret)
585 return FALSE;
586
587 util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map +
588 (vb->buffer_offset + ve->src_offset), 0,
589 0, 0, 1, 1);
590 so = *pso;
591 if (!so)
592 *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
593
594 switch (ve->nr_components) {
595 case 4:
596 so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
597 so_data (so, fui(v[0]));
598 so_data (so, fui(v[1]));
599 so_data (so, fui(v[2]));
600 so_data (so, fui(v[3]));
601 break;
602 case 3:
603 so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3);
604 so_data (so, fui(v[0]));
605 so_data (so, fui(v[1]));
606 so_data (so, fui(v[2]));
607 break;
608 case 2:
609 so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2);
610 so_data (so, fui(v[0]));
611 so_data (so, fui(v[1]));
612 break;
613 case 1:
614 if (attrib == nv50->vertprog->cfg.edgeflag_in) {
615 so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
616 so_data (so, v[0] ? 1 : 0);
617 }
618 so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
619 so_data (so, fui(v[0]));
620 break;
621 default:
622 nouveau_bo_unmap(bo);
623 return FALSE;
624 }
625
626 nouveau_bo_unmap(bo);
627 return TRUE;
628 }
629
630 void
631 nv50_vbo_validate(struct nv50_context *nv50)
632 {
633 struct nouveau_grobj *tesla = nv50->screen->tesla;
634 struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr;
635 unsigned i, n_ve;
636
637 /* don't validate if Gallium took away our buffers */
638 if (nv50->vtxbuf_nr == 0)
639 return;
640 nv50->vbo_fifo = 0;
641
642 for (i = 0; i < nv50->vtxbuf_nr; ++i)
643 if (nv50->vtxbuf[i].stride &&
644 !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
645 nv50->vbo_fifo = 0xffff;
646
647 if (nv50->vertprog->cfg.edgeflag_in < 16)
648 nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
649
650 n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
651
652 vtxattr = NULL;
653 vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
654 vtxfmt = so_new(1, n_ve, 0);
655 so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
656
657 for (i = 0; i < nv50->vtxelt_nr; i++) {
658 struct pipe_vertex_element *ve = &nv50->vtxelt[i];
659 struct pipe_vertex_buffer *vb =
660 &nv50->vtxbuf[ve->vertex_buffer_index];
661 struct nouveau_bo *bo = nouveau_bo(vb->buffer);
662 uint32_t hw = nv50_vbo_vtxelt_to_hw(ve);
663
664 if (!vb->stride &&
665 nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
666 so_data(vtxfmt, hw | (1 << 4));
667
668 so_method(vtxbuf, tesla,
669 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
670 so_data (vtxbuf, 0);
671
672 nv50->vbo_fifo &= ~(1 << i);
673 continue;
674 }
675 so_data(vtxfmt, hw | i);
676
677 if (nv50->vbo_fifo) {
678 so_method(vtxbuf, tesla,
679 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
680 so_data (vtxbuf, 0);
681 continue;
682 }
683
684 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
685 so_data (vtxbuf, 0x20000000 |
686 (ve->instance_divisor ? 0 : vb->stride));
687 so_reloc (vtxbuf, bo, vb->buffer_offset +
688 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
689 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
690 so_reloc (vtxbuf, bo, vb->buffer_offset +
691 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
692 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
693
694 /* vertex array limits */
695 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
696 so_reloc (vtxbuf, bo, vb->buffer->size - 1,
697 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
698 NOUVEAU_BO_HIGH, 0, 0);
699 so_reloc (vtxbuf, bo, vb->buffer->size - 1,
700 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
701 NOUVEAU_BO_LOW, 0, 0);
702 }
703 for (; i < n_ve; ++i) {
704 so_data (vtxfmt, 0x7e080010);
705
706 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
707 so_data (vtxbuf, 0);
708 }
709 nv50->state.vtxelt_nr = nv50->vtxelt_nr;
710
711 so_ref (vtxfmt, &nv50->state.vtxfmt);
712 so_ref (vtxbuf, &nv50->state.vtxbuf);
713 so_ref (vtxattr, &nv50->state.vtxattr);
714 so_ref (NULL, &vtxbuf);
715 so_ref (NULL, &vtxfmt);
716 so_ref (NULL, &vtxattr);
717 }
718
719 typedef void (*pfn_push)(struct nouveau_channel *, void *);
720
721 struct nv50_vbo_emitctx
722 {
723 pfn_push push[16];
724 void *map[16];
725 unsigned stride[16];
726 unsigned nr_ve;
727 unsigned vtx_dwords;
728 unsigned vtx_max;
729
730 float edgeflag;
731 unsigned ve_edgeflag;
732 };
733
734 static INLINE void
735 emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
736 {
737 unsigned i;
738
739 for (i = 0; i < emit->nr_ve; ++i) {
740 emit->push[i](chan, emit->map[i]);
741 emit->map[i] += emit->stride[i];
742 }
743 }
744
745 static INLINE void
746 emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
747 uint32_t vi)
748 {
749 unsigned i;
750
751 for (i = 0; i < emit->nr_ve; ++i)
752 emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
753 }
754
755 static INLINE boolean
756 nv50_map_vbufs(struct nv50_context *nv50)
757 {
758 int i;
759
760 for (i = 0; i < nv50->vtxbuf_nr; ++i) {
761 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
762 unsigned size, delta;
763
764 if (nouveau_bo(vb->buffer)->map)
765 continue;
766
767 size = vb->stride * (vb->max_index + 1);
768 delta = vb->buffer_offset;
769
770 if (!size)
771 size = vb->buffer->size - vb->buffer_offset;
772
773 if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
774 delta, size, NOUVEAU_BO_RD))
775 break;
776 }
777
778 if (i == nv50->vtxbuf_nr)
779 return TRUE;
780 for (; i >= 0; --i)
781 nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
782 return FALSE;
783 }
784
785 static INLINE void
786 nv50_unmap_vbufs(struct nv50_context *nv50)
787 {
788 unsigned i;
789
790 for (i = 0; i < nv50->vtxbuf_nr; ++i)
791 if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
792 nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
793 }
794
795 static void
796 emit_b32_1(struct nouveau_channel *chan, void *data)
797 {
798 uint32_t *v = data;
799
800 OUT_RING(chan, v[0]);
801 }
802
803 static void
804 emit_b32_2(struct nouveau_channel *chan, void *data)
805 {
806 uint32_t *v = data;
807
808 OUT_RING(chan, v[0]);
809 OUT_RING(chan, v[1]);
810 }
811
812 static void
813 emit_b32_3(struct nouveau_channel *chan, void *data)
814 {
815 uint32_t *v = data;
816
817 OUT_RING(chan, v[0]);
818 OUT_RING(chan, v[1]);
819 OUT_RING(chan, v[2]);
820 }
821
822 static void
823 emit_b32_4(struct nouveau_channel *chan, void *data)
824 {
825 uint32_t *v = data;
826
827 OUT_RING(chan, v[0]);
828 OUT_RING(chan, v[1]);
829 OUT_RING(chan, v[2]);
830 OUT_RING(chan, v[3]);
831 }
832
833 static void
834 emit_b16_1(struct nouveau_channel *chan, void *data)
835 {
836 uint16_t *v = data;
837
838 OUT_RING(chan, v[0]);
839 }
840
841 static void
842 emit_b16_3(struct nouveau_channel *chan, void *data)
843 {
844 uint16_t *v = data;
845
846 OUT_RING(chan, (v[1] << 16) | v[0]);
847 OUT_RING(chan, v[2]);
848 }
849
850 static void
851 emit_b08_1(struct nouveau_channel *chan, void *data)
852 {
853 uint8_t *v = data;
854
855 OUT_RING(chan, v[0]);
856 }
857
858 static void
859 emit_b08_3(struct nouveau_channel *chan, void *data)
860 {
861 uint8_t *v = data;
862
863 OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
864 }
865
866 static boolean
867 emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
868 unsigned start)
869 {
870 unsigned i;
871
872 if (nv50_map_vbufs(nv50) == FALSE)
873 return FALSE;
874
875 emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
876
877 emit->edgeflag = 0.5f;
878 emit->nr_ve = 0;
879 emit->vtx_dwords = 0;
880
881 for (i = 0; i < nv50->vtxelt_nr; ++i) {
882 struct pipe_vertex_element *ve;
883 struct pipe_vertex_buffer *vb;
884 unsigned n, size;
885 const struct util_format_description *desc;
886
887 ve = &nv50->vtxelt[i];
888 vb = &nv50->vtxbuf[ve->vertex_buffer_index];
889 if (!(nv50->vbo_fifo & (1 << i)))
890 continue;
891 n = emit->nr_ve++;
892
893 emit->stride[n] = vb->stride;
894 emit->map[n] = nouveau_bo(vb->buffer)->map +
895 (start * vb->stride + ve->src_offset);
896
897 desc = util_format_description(ve->src_format);
898 assert(desc);
899
900 size = util_format_get_component_bits(
901 ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
902
903 assert(ve->nr_components > 0 && ve->nr_components <= 4);
904
905 /* It shouldn't be necessary to push the implicit 1s
906 * for case 3 and size 8 cases 1, 2, 3.
907 */
908 switch (size) {
909 default:
910 NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
911 return FALSE;
912 case 32:
913 switch (ve->nr_components) {
914 case 1: emit->push[n] = emit_b32_1; break;
915 case 2: emit->push[n] = emit_b32_2; break;
916 case 3: emit->push[n] = emit_b32_3; break;
917 case 4: emit->push[n] = emit_b32_4; break;
918 }
919 emit->vtx_dwords += ve->nr_components;
920 break;
921 case 16:
922 switch (ve->nr_components) {
923 case 1: emit->push[n] = emit_b16_1; break;
924 case 2: emit->push[n] = emit_b32_1; break;
925 case 3: emit->push[n] = emit_b16_3; break;
926 case 4: emit->push[n] = emit_b32_2; break;
927 }
928 emit->vtx_dwords += (ve->nr_components + 1) >> 1;
929 break;
930 case 8:
931 switch (ve->nr_components) {
932 case 1: emit->push[n] = emit_b08_1; break;
933 case 2: emit->push[n] = emit_b16_1; break;
934 case 3: emit->push[n] = emit_b08_3; break;
935 case 4: emit->push[n] = emit_b32_1; break;
936 }
937 emit->vtx_dwords += 1;
938 break;
939 }
940 }
941
942 emit->vtx_max = 512 / emit->vtx_dwords;
943 if (emit->ve_edgeflag < 16)
944 emit->vtx_max = 1;
945
946 return TRUE;
947 }
948
949 static INLINE void
950 set_edgeflag(struct nouveau_channel *chan,
951 struct nouveau_grobj *tesla,
952 struct nv50_vbo_emitctx *emit, uint32_t index)
953 {
954 unsigned i = emit->ve_edgeflag;
955
956 if (i < 16) {
957 float f = *((float *)(emit->map[i] + index * emit->stride[i]));
958
959 if (emit->edgeflag != f) {
960 emit->edgeflag = f;
961
962 BEGIN_RING(chan, tesla, 0x15e4, 1);
963 OUT_RING (chan, f ? 1 : 0);
964 }
965 }
966 }
967
968 static boolean
969 nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
970 {
971 struct nouveau_channel *chan = nv50->screen->base.channel;
972 struct nouveau_grobj *tesla = nv50->screen->tesla;
973 struct nv50_vbo_emitctx emit;
974
975 if (emit_prepare(nv50, &emit, start) == FALSE)
976 return FALSE;
977
978 while (count) {
979 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
980 dw = nr * emit.vtx_dwords;
981
982 set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
983
984 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
985 for (i = 0; i < nr; ++i)
986 emit_vtx_next(chan, &emit);
987
988 count -= nr;
989 }
990 nv50_unmap_vbufs(nv50);
991
992 return TRUE;
993 }
994
995 static boolean
996 nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
997 {
998 struct nouveau_channel *chan = nv50->screen->base.channel;
999 struct nouveau_grobj *tesla = nv50->screen->tesla;
1000 struct nv50_vbo_emitctx emit;
1001
1002 if (emit_prepare(nv50, &emit, 0) == FALSE)
1003 return FALSE;
1004
1005 while (count) {
1006 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1007 dw = nr * emit.vtx_dwords;
1008
1009 set_edgeflag(chan, tesla, &emit, *map);
1010
1011 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
1012 for (i = 0; i < nr; ++i)
1013 emit_vtx(chan, &emit, *map++);
1014
1015 count -= nr;
1016 }
1017 nv50_unmap_vbufs(nv50);
1018
1019 return TRUE;
1020 }
1021
1022 static boolean
1023 nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
1024 {
1025 struct nouveau_channel *chan = nv50->screen->base.channel;
1026 struct nouveau_grobj *tesla = nv50->screen->tesla;
1027 struct nv50_vbo_emitctx emit;
1028
1029 if (emit_prepare(nv50, &emit, 0) == FALSE)
1030 return FALSE;
1031
1032 while (count) {
1033 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1034 dw = nr * emit.vtx_dwords;
1035
1036 set_edgeflag(chan, tesla, &emit, *map);
1037
1038 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
1039 for (i = 0; i < nr; ++i)
1040 emit_vtx(chan, &emit, *map++);
1041
1042 count -= nr;
1043 }
1044 nv50_unmap_vbufs(nv50);
1045
1046 return TRUE;
1047 }
1048
1049 static boolean
1050 nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
1051 {
1052 struct nouveau_channel *chan = nv50->screen->base.channel;
1053 struct nouveau_grobj *tesla = nv50->screen->tesla;
1054 struct nv50_vbo_emitctx emit;
1055
1056 if (emit_prepare(nv50, &emit, 0) == FALSE)
1057 return FALSE;
1058
1059 while (count) {
1060 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1061 dw = nr * emit.vtx_dwords;
1062
1063 set_edgeflag(chan, tesla, &emit, *map);
1064
1065 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
1066 for (i = 0; i < nr; ++i)
1067 emit_vtx(chan, &emit, *map++);
1068
1069 count -= nr;
1070 }
1071 nv50_unmap_vbufs(nv50);
1072
1073 return TRUE;
1074 }