nv: Use util_format_name().
[mesa.git] / src / gallium / drivers / nv50 / nv50_vbo.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_state.h"
25 #include "util/u_inlines.h"
26 #include "util/u_format.h"
27
28 #include "nv50_context.h"
29
30 static boolean
31 nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
32
33 static boolean
34 nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
35
36 static boolean
37 nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
38
39 static boolean
40 nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
41
42 #define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
43
44 static INLINE unsigned
45 nv50_prim(unsigned mode)
46 {
47 switch (mode) {
48 case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
49 case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
50 case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
51 case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
52 case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
53 case PIPE_PRIM_TRIANGLE_STRIP:
54 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
55 case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
56 case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
57 case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
58 case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
59 case PIPE_PRIM_LINES_ADJACENCY:
60 return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
61 case PIPE_PRIM_LINE_STRIP_ADJACENCY:
62 return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
63 case PIPE_PRIM_TRIANGLES_ADJACENCY:
64 return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
65 case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
66 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
67 default:
68 break;
69 }
70
71 NOUVEAU_ERR("invalid primitive type %d\n", mode);
72 return NV50TCL_VERTEX_BEGIN_POINTS;
73 }
74
75 static INLINE uint32_t
76 nv50_vbo_type_to_hw(enum pipe_format format)
77 {
78 const struct util_format_description *desc;
79
80 desc = util_format_description(format);
81 assert(desc);
82
83 switch (desc->channel[0].type) {
84 case UTIL_FORMAT_TYPE_FLOAT:
85 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
86 case UTIL_FORMAT_TYPE_UNSIGNED:
87 if (desc->channel[0].normalized) {
88 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
89 }
90 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
91 case UTIL_FORMAT_TYPE_SIGNED:
92 if (desc->channel[0].normalized) {
93 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
94 }
95 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
96 /*
97 case PIPE_FORMAT_TYPE_UINT:
98 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
99 case PIPE_FORMAT_TYPE_SINT:
100 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */
101 default:
102 return 0;
103 }
104 }
105
106 static INLINE uint32_t
107 nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
108 {
109 static const uint32_t hw_values[] = {
110 0, 0, 0, 0,
111 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
112 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
113 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
114 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
115 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
116 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
117 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
118 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
119 0, 0, 0, 0,
120 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
121 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
122 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
123 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
124
125 /* we'd also have R11G11B10 and R10G10B10A2 */
126
127 assert(nr_c > 0 && nr_c <= 4);
128
129 if (size > 32)
130 return 0;
131 size >>= (3 - 2);
132
133 return hw_values[size + (nr_c - 1)];
134 }
135
136 static INLINE uint32_t
137 nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
138 {
139 uint32_t hw_type, hw_size;
140 enum pipe_format pf = ve->src_format;
141 const struct util_format_description *desc;
142 unsigned size;
143
144 desc = util_format_description(pf);
145 assert(desc);
146
147 size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
148
149 hw_type = nv50_vbo_type_to_hw(pf);
150 hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
151
152 if (!hw_type || !hw_size) {
153 NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
154 abort();
155 return 0x24e80000;
156 }
157
158 if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
159 hw_size |= (1 << 31); /* no real swizzle bits :-( */
160
161 return (hw_type | hw_size);
162 }
163
164 /* For instanced drawing from user buffers, hitting the FIFO repeatedly
165 * with the same vertex data is probably worse than uploading all data.
166 */
167 static boolean
168 nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
169 {
170 struct nv50_screen *nscreen = nv50->screen;
171 struct pipe_screen *pscreen = &nscreen->base.base;
172 struct pipe_buffer *buf = nscreen->strm_vbuf[i];
173 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
174 uint8_t *src;
175 unsigned size = align(vb->buffer->size, 4096);
176
177 if (buf && buf->size < size)
178 pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
179
180 if (!nscreen->strm_vbuf[i]) {
181 nscreen->strm_vbuf[i] = pipe_buffer_create(
182 pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
183 buf = nscreen->strm_vbuf[i];
184 }
185
186 src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
187 if (!src)
188 return FALSE;
189 src += vb->buffer_offset;
190
191 size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
192 if (vb->buffer_offset + size > vb->buffer->size)
193 size = vb->buffer->size - vb->buffer_offset;
194
195 pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
196 pipe_buffer_unmap(pscreen, vb->buffer);
197
198 vb->buffer = buf; /* don't pipe_reference, this is a private copy */
199 return TRUE;
200 }
201
202 static void
203 nv50_upload_user_vbufs(struct nv50_context *nv50)
204 {
205 unsigned i;
206
207 if (nv50->vbo_fifo)
208 nv50->dirty |= NV50_NEW_ARRAYS;
209 if (!(nv50->dirty & NV50_NEW_ARRAYS))
210 return;
211
212 for (i = 0; i < nv50->vtxbuf_nr; ++i) {
213 if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
214 continue;
215 nv50_upload_vtxbuf(nv50, i);
216 }
217 }
218
219 static void
220 nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
221 {
222 struct nouveau_grobj *tesla = nv50->screen->tesla;
223 struct nouveau_channel *chan = tesla->channel;
224 float v[4];
225
226 util_format_read_4f(nv50->vtxelt[i].src_format,
227 v, 0, data, 0, 0, 0, 1, 1);
228
229 switch (nv50->vtxelt[i].nr_components) {
230 case 4:
231 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
232 OUT_RINGf (chan, v[0]);
233 OUT_RINGf (chan, v[1]);
234 OUT_RINGf (chan, v[2]);
235 OUT_RINGf (chan, v[3]);
236 break;
237 case 3:
238 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3);
239 OUT_RINGf (chan, v[0]);
240 OUT_RINGf (chan, v[1]);
241 OUT_RINGf (chan, v[2]);
242 break;
243 case 2:
244 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2);
245 OUT_RINGf (chan, v[0]);
246 OUT_RINGf (chan, v[1]);
247 break;
248 case 1:
249 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1);
250 OUT_RINGf (chan, v[0]);
251 break;
252 default:
253 assert(0);
254 break;
255 }
256 }
257
258 static unsigned
259 init_per_instance_arrays_immd(struct nv50_context *nv50,
260 unsigned startInstance,
261 unsigned pos[16], unsigned step[16])
262 {
263 struct nouveau_bo *bo;
264 unsigned i, b, count = 0;
265
266 for (i = 0; i < nv50->vtxelt_nr; ++i) {
267 if (!nv50->vtxelt[i].instance_divisor)
268 continue;
269 ++count;
270 b = nv50->vtxelt[i].vertex_buffer_index;
271
272 pos[i] = nv50->vtxelt[i].src_offset +
273 nv50->vtxbuf[b].buffer_offset +
274 startInstance * nv50->vtxbuf[b].stride;
275 step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
276
277 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
278 if (!bo->map)
279 nouveau_bo_map(bo, NOUVEAU_BO_RD);
280
281 nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
282 }
283
284 return count;
285 }
286
287 static unsigned
288 init_per_instance_arrays(struct nv50_context *nv50,
289 unsigned startInstance,
290 unsigned pos[16], unsigned step[16])
291 {
292 struct nouveau_grobj *tesla = nv50->screen->tesla;
293 struct nouveau_channel *chan = tesla->channel;
294 struct nouveau_bo *bo;
295 struct nouveau_stateobj *so;
296 unsigned i, b, count = 0;
297 const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
298
299 if (nv50->vbo_fifo)
300 return init_per_instance_arrays_immd(nv50, startInstance,
301 pos, step);
302
303 so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
304
305 for (i = 0; i < nv50->vtxelt_nr; ++i) {
306 if (!nv50->vtxelt[i].instance_divisor)
307 continue;
308 ++count;
309 b = nv50->vtxelt[i].vertex_buffer_index;
310
311 pos[i] = nv50->vtxelt[i].src_offset +
312 nv50->vtxbuf[b].buffer_offset +
313 startInstance * nv50->vtxbuf[b].stride;
314
315 if (!startInstance) {
316 step[i] = 0;
317 continue;
318 }
319 step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
320
321 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
322
323 so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
324 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
325 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
326 }
327
328 if (count && startInstance) {
329 so_ref (so, &nv50->state.instbuf); /* for flush notify */
330 so_emit(chan, nv50->state.instbuf);
331 }
332 so_ref (NULL, &so);
333
334 return count;
335 }
336
337 static void
338 step_per_instance_arrays_immd(struct nv50_context *nv50,
339 unsigned pos[16], unsigned step[16])
340 {
341 struct nouveau_bo *bo;
342 unsigned i, b;
343
344 for (i = 0; i < nv50->vtxelt_nr; ++i) {
345 if (!nv50->vtxelt[i].instance_divisor)
346 continue;
347 if (++step[i] != nv50->vtxelt[i].instance_divisor)
348 continue;
349 b = nv50->vtxelt[i].vertex_buffer_index;
350 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
351
352 step[i] = 0;
353 pos[i] += nv50->vtxbuf[b].stride;
354
355 nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
356 }
357 }
358
359 static void
360 step_per_instance_arrays(struct nv50_context *nv50,
361 unsigned pos[16], unsigned step[16])
362 {
363 struct nouveau_grobj *tesla = nv50->screen->tesla;
364 struct nouveau_channel *chan = tesla->channel;
365 struct nouveau_bo *bo;
366 struct nouveau_stateobj *so;
367 unsigned i, b;
368 const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
369
370 if (nv50->vbo_fifo) {
371 step_per_instance_arrays_immd(nv50, pos, step);
372 return;
373 }
374
375 so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
376
377 for (i = 0; i < nv50->vtxelt_nr; ++i) {
378 if (!nv50->vtxelt[i].instance_divisor)
379 continue;
380 b = nv50->vtxelt[i].vertex_buffer_index;
381
382 if (++step[i] == nv50->vtxelt[i].instance_divisor) {
383 step[i] = 0;
384 pos[i] += nv50->vtxbuf[b].stride;
385 }
386
387 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
388
389 so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
390 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
391 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
392 }
393
394 so_ref (so, &nv50->state.instbuf); /* for flush notify */
395 so_ref (NULL, &so);
396
397 so_emit(chan, nv50->state.instbuf);
398 }
399
400 static INLINE void
401 nv50_unmap_vbufs(struct nv50_context *nv50)
402 {
403 unsigned i;
404
405 for (i = 0; i < nv50->vtxbuf_nr; ++i)
406 if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
407 nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
408 }
409
410 void
411 nv50_draw_arrays_instanced(struct pipe_context *pipe,
412 unsigned mode, unsigned start, unsigned count,
413 unsigned startInstance, unsigned instanceCount)
414 {
415 struct nv50_context *nv50 = nv50_context(pipe);
416 struct nouveau_channel *chan = nv50->screen->tesla->channel;
417 struct nouveau_grobj *tesla = nv50->screen->tesla;
418 unsigned i, nz_divisors;
419 unsigned step[16], pos[16];
420
421 if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
422 nv50_upload_user_vbufs(nv50);
423
424 nv50_state_validate(nv50);
425
426 nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
427
428 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
429 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
430 OUT_RING (chan, startInstance);
431
432 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
433 OUT_RING (chan, nv50_prim(mode));
434
435 if (nv50->vbo_fifo)
436 nv50_push_arrays(nv50, start, count);
437 else {
438 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
439 OUT_RING (chan, start);
440 OUT_RING (chan, count);
441 }
442 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
443 OUT_RING (chan, 0);
444
445 for (i = 1; i < instanceCount; i++) {
446 if (nz_divisors) /* any non-zero array divisors ? */
447 step_per_instance_arrays(nv50, pos, step);
448
449 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
450 OUT_RING (chan, nv50_prim(mode) | (1 << 28));
451
452 if (nv50->vbo_fifo)
453 nv50_push_arrays(nv50, start, count);
454 else {
455 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
456 OUT_RING (chan, start);
457 OUT_RING (chan, count);
458 }
459 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
460 OUT_RING (chan, 0);
461 }
462 nv50_unmap_vbufs(nv50);
463
464 so_ref(NULL, &nv50->state.instbuf);
465 }
466
467 void
468 nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
469 unsigned count)
470 {
471 struct nv50_context *nv50 = nv50_context(pipe);
472 struct nouveau_channel *chan = nv50->screen->tesla->channel;
473 struct nouveau_grobj *tesla = nv50->screen->tesla;
474 boolean ret;
475
476 nv50_state_validate(nv50);
477
478 BEGIN_RING(chan, tesla, 0x142c, 1);
479 OUT_RING (chan, 0);
480 BEGIN_RING(chan, tesla, 0x142c, 1);
481 OUT_RING (chan, 0);
482
483 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
484 OUT_RING (chan, nv50_prim(mode));
485
486 if (nv50->vbo_fifo)
487 ret = nv50_push_arrays(nv50, start, count);
488 else {
489 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
490 OUT_RING (chan, start);
491 OUT_RING (chan, count);
492 ret = TRUE;
493 }
494 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
495 OUT_RING (chan, 0);
496
497 nv50_unmap_vbufs(nv50);
498
499 /* XXX: not sure what to do if ret != TRUE: flush and retry?
500 */
501 assert(ret);
502 }
503
504 static INLINE boolean
505 nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
506 unsigned start, unsigned count)
507 {
508 struct nouveau_channel *chan = nv50->screen->tesla->channel;
509 struct nouveau_grobj *tesla = nv50->screen->tesla;
510
511 map += start;
512
513 if (nv50->vbo_fifo)
514 return nv50_push_elements_u08(nv50, map, count);
515
516 if (count & 1) {
517 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
518 OUT_RING (chan, map[0]);
519 map++;
520 count--;
521 }
522
523 while (count) {
524 unsigned nr = count > 2046 ? 2046 : count;
525 int i;
526
527 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
528 for (i = 0; i < nr; i += 2)
529 OUT_RING (chan, (map[i + 1] << 16) | map[i]);
530
531 count -= nr;
532 map += nr;
533 }
534 return TRUE;
535 }
536
537 static INLINE boolean
538 nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
539 unsigned start, unsigned count)
540 {
541 struct nouveau_channel *chan = nv50->screen->tesla->channel;
542 struct nouveau_grobj *tesla = nv50->screen->tesla;
543
544 map += start;
545
546 if (nv50->vbo_fifo)
547 return nv50_push_elements_u16(nv50, map, count);
548
549 if (count & 1) {
550 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
551 OUT_RING (chan, map[0]);
552 map++;
553 count--;
554 }
555
556 while (count) {
557 unsigned nr = count > 2046 ? 2046 : count;
558 int i;
559
560 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
561 for (i = 0; i < nr; i += 2)
562 OUT_RING (chan, (map[i + 1] << 16) | map[i]);
563
564 count -= nr;
565 map += nr;
566 }
567 return TRUE;
568 }
569
570 static INLINE boolean
571 nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
572 unsigned start, unsigned count)
573 {
574 struct nouveau_channel *chan = nv50->screen->tesla->channel;
575 struct nouveau_grobj *tesla = nv50->screen->tesla;
576
577 map += start;
578
579 if (nv50->vbo_fifo)
580 return nv50_push_elements_u32(nv50, map, count);
581
582 while (count) {
583 unsigned nr = count > 2047 ? 2047 : count;
584
585 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
586 OUT_RINGp (chan, map, nr);
587
588 count -= nr;
589 map += nr;
590 }
591 return TRUE;
592 }
593
594 static INLINE void
595 nv50_draw_elements_inline(struct nv50_context *nv50,
596 void *map, unsigned indexSize,
597 unsigned start, unsigned count)
598 {
599 switch (indexSize) {
600 case 1:
601 nv50_draw_elements_inline_u08(nv50, map, start, count);
602 break;
603 case 2:
604 nv50_draw_elements_inline_u16(nv50, map, start, count);
605 break;
606 case 4:
607 nv50_draw_elements_inline_u32(nv50, map, start, count);
608 break;
609 }
610 }
611
612 void
613 nv50_draw_elements_instanced(struct pipe_context *pipe,
614 struct pipe_buffer *indexBuffer,
615 unsigned indexSize,
616 unsigned mode, unsigned start, unsigned count,
617 unsigned startInstance, unsigned instanceCount)
618 {
619 struct nv50_context *nv50 = nv50_context(pipe);
620 struct nouveau_grobj *tesla = nv50->screen->tesla;
621 struct nouveau_channel *chan = tesla->channel;
622 struct pipe_screen *pscreen = pipe->screen;
623 void *map;
624 unsigned i, nz_divisors;
625 unsigned step[16], pos[16];
626
627 map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
628
629 if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
630 nv50_upload_user_vbufs(nv50);
631
632 nv50_state_validate(nv50);
633
634 nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
635
636 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
637 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
638 OUT_RING (chan, startInstance);
639
640 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
641 OUT_RING (chan, nv50_prim(mode));
642
643 nv50_draw_elements_inline(nv50, map, indexSize, start, count);
644
645 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
646 OUT_RING (chan, 0);
647
648 for (i = 1; i < instanceCount; ++i) {
649 if (nz_divisors) /* any non-zero array divisors ? */
650 step_per_instance_arrays(nv50, pos, step);
651
652 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
653 OUT_RING (chan, nv50_prim(mode) | (1 << 28));
654
655 nv50_draw_elements_inline(nv50, map, indexSize, start, count);
656
657 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
658 OUT_RING (chan, 0);
659 }
660 nv50_unmap_vbufs(nv50);
661
662 so_ref(NULL, &nv50->state.instbuf);
663 }
664
665 void
666 nv50_draw_elements(struct pipe_context *pipe,
667 struct pipe_buffer *indexBuffer, unsigned indexSize,
668 unsigned mode, unsigned start, unsigned count)
669 {
670 struct nv50_context *nv50 = nv50_context(pipe);
671 struct nouveau_channel *chan = nv50->screen->tesla->channel;
672 struct nouveau_grobj *tesla = nv50->screen->tesla;
673 struct pipe_screen *pscreen = pipe->screen;
674 void *map;
675
676 map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
677
678 nv50_state_validate(nv50);
679
680 BEGIN_RING(chan, tesla, 0x142c, 1);
681 OUT_RING (chan, 0);
682 BEGIN_RING(chan, tesla, 0x142c, 1);
683 OUT_RING (chan, 0);
684
685 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
686 OUT_RING (chan, nv50_prim(mode));
687
688 nv50_draw_elements_inline(nv50, map, indexSize, start, count);
689
690 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
691 OUT_RING (chan, 0);
692
693 nv50_unmap_vbufs(nv50);
694
695 pipe_buffer_unmap(pscreen, indexBuffer);
696 }
697
698 static INLINE boolean
699 nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
700 struct nouveau_stateobj **pso,
701 struct pipe_vertex_element *ve,
702 struct pipe_vertex_buffer *vb)
703
704 {
705 struct nouveau_stateobj *so;
706 struct nouveau_grobj *tesla = nv50->screen->tesla;
707 struct nouveau_bo *bo = nouveau_bo(vb->buffer);
708 float v[4];
709 int ret;
710
711 ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
712 if (ret)
713 return FALSE;
714
715 util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map +
716 (vb->buffer_offset + ve->src_offset), 0,
717 0, 0, 1, 1);
718 so = *pso;
719 if (!so)
720 *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
721
722 switch (ve->nr_components) {
723 case 4:
724 so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
725 so_data (so, fui(v[0]));
726 so_data (so, fui(v[1]));
727 so_data (so, fui(v[2]));
728 so_data (so, fui(v[3]));
729 break;
730 case 3:
731 so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3);
732 so_data (so, fui(v[0]));
733 so_data (so, fui(v[1]));
734 so_data (so, fui(v[2]));
735 break;
736 case 2:
737 so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2);
738 so_data (so, fui(v[0]));
739 so_data (so, fui(v[1]));
740 break;
741 case 1:
742 if (attrib == nv50->vertprog->cfg.edgeflag_in) {
743 so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
744 so_data (so, v[0] ? 1 : 0);
745 }
746 so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
747 so_data (so, fui(v[0]));
748 break;
749 default:
750 nouveau_bo_unmap(bo);
751 return FALSE;
752 }
753
754 nouveau_bo_unmap(bo);
755 return TRUE;
756 }
757
758 void
759 nv50_vbo_validate(struct nv50_context *nv50)
760 {
761 struct nouveau_grobj *tesla = nv50->screen->tesla;
762 struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr;
763 unsigned i, n_ve;
764
765 /* don't validate if Gallium took away our buffers */
766 if (nv50->vtxbuf_nr == 0)
767 return;
768 nv50->vbo_fifo = 0;
769
770 for (i = 0; i < nv50->vtxbuf_nr; ++i)
771 if (nv50->vtxbuf[i].stride &&
772 !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
773 nv50->vbo_fifo = 0xffff;
774
775 if (NV50_USING_LOATHED_EDGEFLAG(nv50))
776 nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
777
778 n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
779
780 vtxattr = NULL;
781 vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
782 vtxfmt = so_new(1, n_ve, 0);
783 so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
784
785 for (i = 0; i < nv50->vtxelt_nr; i++) {
786 struct pipe_vertex_element *ve = &nv50->vtxelt[i];
787 struct pipe_vertex_buffer *vb =
788 &nv50->vtxbuf[ve->vertex_buffer_index];
789 struct nouveau_bo *bo = nouveau_bo(vb->buffer);
790 uint32_t hw = nv50_vbo_vtxelt_to_hw(ve);
791
792 if (!vb->stride &&
793 nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
794 so_data(vtxfmt, hw | (1 << 4));
795
796 so_method(vtxbuf, tesla,
797 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
798 so_data (vtxbuf, 0);
799
800 nv50->vbo_fifo &= ~(1 << i);
801 continue;
802 }
803
804 if (nv50->vbo_fifo) {
805 so_data (vtxfmt, hw |
806 (ve->instance_divisor ? (1 << 4) : i));
807 so_method(vtxbuf, tesla,
808 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
809 so_data (vtxbuf, 0);
810 continue;
811 }
812 so_data(vtxfmt, hw | i);
813
814 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
815 so_data (vtxbuf, 0x20000000 |
816 (ve->instance_divisor ? 0 : vb->stride));
817 so_reloc (vtxbuf, bo, vb->buffer_offset +
818 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
819 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
820 so_reloc (vtxbuf, bo, vb->buffer_offset +
821 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
822 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
823
824 /* vertex array limits */
825 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
826 so_reloc (vtxbuf, bo, vb->buffer->size - 1,
827 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
828 NOUVEAU_BO_HIGH, 0, 0);
829 so_reloc (vtxbuf, bo, vb->buffer->size - 1,
830 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
831 NOUVEAU_BO_LOW, 0, 0);
832 }
833 for (; i < n_ve; ++i) {
834 so_data (vtxfmt, 0x7e080010);
835
836 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
837 so_data (vtxbuf, 0);
838 }
839 nv50->state.vtxelt_nr = nv50->vtxelt_nr;
840
841 so_ref (vtxfmt, &nv50->state.vtxfmt);
842 so_ref (vtxbuf, &nv50->state.vtxbuf);
843 so_ref (vtxattr, &nv50->state.vtxattr);
844 so_ref (NULL, &vtxbuf);
845 so_ref (NULL, &vtxfmt);
846 so_ref (NULL, &vtxattr);
847 }
848
849 typedef void (*pfn_push)(struct nouveau_channel *, void *);
850
851 struct nv50_vbo_emitctx
852 {
853 pfn_push push[16];
854 uint8_t *map[16];
855 unsigned stride[16];
856 unsigned nr_ve;
857 unsigned vtx_dwords;
858 unsigned vtx_max;
859
860 float edgeflag;
861 unsigned ve_edgeflag;
862 };
863
864 static INLINE void
865 emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
866 {
867 unsigned i;
868
869 for (i = 0; i < emit->nr_ve; ++i) {
870 emit->push[i](chan, emit->map[i]);
871 emit->map[i] += emit->stride[i];
872 }
873 }
874
875 static INLINE void
876 emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
877 uint32_t vi)
878 {
879 unsigned i;
880
881 for (i = 0; i < emit->nr_ve; ++i)
882 emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
883 }
884
885 static INLINE boolean
886 nv50_map_vbufs(struct nv50_context *nv50)
887 {
888 int i;
889
890 for (i = 0; i < nv50->vtxbuf_nr; ++i) {
891 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
892 unsigned size = vb->stride * (vb->max_index + 1) + 16;
893
894 if (nouveau_bo(vb->buffer)->map)
895 continue;
896
897 size = vb->stride * (vb->max_index + 1) + 16;
898 size = MIN2(size, vb->buffer->size);
899 if (!size)
900 size = vb->buffer->size;
901
902 if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
903 0, size, NOUVEAU_BO_RD))
904 break;
905 }
906
907 if (i == nv50->vtxbuf_nr)
908 return TRUE;
909 for (; i >= 0; --i)
910 nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
911 return FALSE;
912 }
913
914 static void
915 emit_b32_1(struct nouveau_channel *chan, void *data)
916 {
917 uint32_t *v = data;
918
919 OUT_RING(chan, v[0]);
920 }
921
922 static void
923 emit_b32_2(struct nouveau_channel *chan, void *data)
924 {
925 uint32_t *v = data;
926
927 OUT_RING(chan, v[0]);
928 OUT_RING(chan, v[1]);
929 }
930
931 static void
932 emit_b32_3(struct nouveau_channel *chan, void *data)
933 {
934 uint32_t *v = data;
935
936 OUT_RING(chan, v[0]);
937 OUT_RING(chan, v[1]);
938 OUT_RING(chan, v[2]);
939 }
940
941 static void
942 emit_b32_4(struct nouveau_channel *chan, void *data)
943 {
944 uint32_t *v = data;
945
946 OUT_RING(chan, v[0]);
947 OUT_RING(chan, v[1]);
948 OUT_RING(chan, v[2]);
949 OUT_RING(chan, v[3]);
950 }
951
952 static void
953 emit_b16_1(struct nouveau_channel *chan, void *data)
954 {
955 uint16_t *v = data;
956
957 OUT_RING(chan, v[0]);
958 }
959
960 static void
961 emit_b16_3(struct nouveau_channel *chan, void *data)
962 {
963 uint16_t *v = data;
964
965 OUT_RING(chan, (v[1] << 16) | v[0]);
966 OUT_RING(chan, v[2]);
967 }
968
969 static void
970 emit_b08_1(struct nouveau_channel *chan, void *data)
971 {
972 uint8_t *v = data;
973
974 OUT_RING(chan, v[0]);
975 }
976
977 static void
978 emit_b08_3(struct nouveau_channel *chan, void *data)
979 {
980 uint8_t *v = data;
981
982 OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
983 }
984
985 static boolean
986 emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
987 unsigned start)
988 {
989 unsigned i;
990
991 if (nv50_map_vbufs(nv50) == FALSE)
992 return FALSE;
993
994 emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
995
996 emit->edgeflag = 0.5f;
997 emit->nr_ve = 0;
998 emit->vtx_dwords = 0;
999
1000 for (i = 0; i < nv50->vtxelt_nr; ++i) {
1001 struct pipe_vertex_element *ve;
1002 struct pipe_vertex_buffer *vb;
1003 unsigned n, size;
1004 const struct util_format_description *desc;
1005
1006 ve = &nv50->vtxelt[i];
1007 vb = &nv50->vtxbuf[ve->vertex_buffer_index];
1008 if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
1009 continue;
1010 n = emit->nr_ve++;
1011
1012 emit->stride[n] = vb->stride;
1013 emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
1014 vb->buffer_offset +
1015 (start * vb->stride + ve->src_offset);
1016
1017 desc = util_format_description(ve->src_format);
1018 assert(desc);
1019
1020 size = util_format_get_component_bits(
1021 ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
1022
1023 assert(ve->nr_components > 0 && ve->nr_components <= 4);
1024
1025 /* It shouldn't be necessary to push the implicit 1s
1026 * for case 3 and size 8 cases 1, 2, 3.
1027 */
1028 switch (size) {
1029 default:
1030 NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
1031 return FALSE;
1032 case 32:
1033 switch (ve->nr_components) {
1034 case 1: emit->push[n] = emit_b32_1; break;
1035 case 2: emit->push[n] = emit_b32_2; break;
1036 case 3: emit->push[n] = emit_b32_3; break;
1037 case 4: emit->push[n] = emit_b32_4; break;
1038 }
1039 emit->vtx_dwords += ve->nr_components;
1040 break;
1041 case 16:
1042 switch (ve->nr_components) {
1043 case 1: emit->push[n] = emit_b16_1; break;
1044 case 2: emit->push[n] = emit_b32_1; break;
1045 case 3: emit->push[n] = emit_b16_3; break;
1046 case 4: emit->push[n] = emit_b32_2; break;
1047 }
1048 emit->vtx_dwords += (ve->nr_components + 1) >> 1;
1049 break;
1050 case 8:
1051 switch (ve->nr_components) {
1052 case 1: emit->push[n] = emit_b08_1; break;
1053 case 2: emit->push[n] = emit_b16_1; break;
1054 case 3: emit->push[n] = emit_b08_3; break;
1055 case 4: emit->push[n] = emit_b32_1; break;
1056 }
1057 emit->vtx_dwords += 1;
1058 break;
1059 }
1060 }
1061
1062 emit->vtx_max = 512 / emit->vtx_dwords;
1063 if (emit->ve_edgeflag < 16)
1064 emit->vtx_max = 1;
1065
1066 return TRUE;
1067 }
1068
1069 static INLINE void
1070 set_edgeflag(struct nouveau_channel *chan,
1071 struct nouveau_grobj *tesla,
1072 struct nv50_vbo_emitctx *emit, uint32_t index)
1073 {
1074 unsigned i = emit->ve_edgeflag;
1075
1076 if (i < 16) {
1077 float f = *((float *)(emit->map[i] + index * emit->stride[i]));
1078
1079 if (emit->edgeflag != f) {
1080 emit->edgeflag = f;
1081
1082 BEGIN_RING(chan, tesla, 0x15e4, 1);
1083 OUT_RING (chan, f ? 1 : 0);
1084 }
1085 }
1086 }
1087
1088 static boolean
1089 nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
1090 {
1091 struct nouveau_channel *chan = nv50->screen->base.channel;
1092 struct nouveau_grobj *tesla = nv50->screen->tesla;
1093 struct nv50_vbo_emitctx emit;
1094
1095 if (emit_prepare(nv50, &emit, start) == FALSE)
1096 return FALSE;
1097
1098 while (count) {
1099 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1100 dw = nr * emit.vtx_dwords;
1101
1102 set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
1103
1104 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1105 for (i = 0; i < nr; ++i)
1106 emit_vtx_next(chan, &emit);
1107
1108 count -= nr;
1109 }
1110
1111 return TRUE;
1112 }
1113
1114 static boolean
1115 nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
1116 {
1117 struct nouveau_channel *chan = nv50->screen->base.channel;
1118 struct nouveau_grobj *tesla = nv50->screen->tesla;
1119 struct nv50_vbo_emitctx emit;
1120
1121 if (emit_prepare(nv50, &emit, 0) == FALSE)
1122 return FALSE;
1123
1124 while (count) {
1125 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1126 dw = nr * emit.vtx_dwords;
1127
1128 set_edgeflag(chan, tesla, &emit, *map);
1129
1130 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1131 for (i = 0; i < nr; ++i)
1132 emit_vtx(chan, &emit, *map++);
1133
1134 count -= nr;
1135 }
1136
1137 return TRUE;
1138 }
1139
1140 static boolean
1141 nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
1142 {
1143 struct nouveau_channel *chan = nv50->screen->base.channel;
1144 struct nouveau_grobj *tesla = nv50->screen->tesla;
1145 struct nv50_vbo_emitctx emit;
1146
1147 if (emit_prepare(nv50, &emit, 0) == FALSE)
1148 return FALSE;
1149
1150 while (count) {
1151 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1152 dw = nr * emit.vtx_dwords;
1153
1154 set_edgeflag(chan, tesla, &emit, *map);
1155
1156 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1157 for (i = 0; i < nr; ++i)
1158 emit_vtx(chan, &emit, *map++);
1159
1160 count -= nr;
1161 }
1162
1163 return TRUE;
1164 }
1165
1166 static boolean
1167 nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
1168 {
1169 struct nouveau_channel *chan = nv50->screen->base.channel;
1170 struct nouveau_grobj *tesla = nv50->screen->tesla;
1171 struct nv50_vbo_emitctx emit;
1172
1173 if (emit_prepare(nv50, &emit, 0) == FALSE)
1174 return FALSE;
1175
1176 while (count) {
1177 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1178 dw = nr * emit.vtx_dwords;
1179
1180 set_edgeflag(chan, tesla, &emit, *map);
1181
1182 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1183 for (i = 0; i < nr; ++i)
1184 emit_vtx(chan, &emit, *map++);
1185
1186 count -= nr;
1187 }
1188
1189 return TRUE;
1190 }