nv50: fix constant vtxattr methods
[mesa.git] / src / gallium / drivers / nv50 / nv50_vbo.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_state.h"
25 #include "pipe/p_inlines.h"
26
27 #include "util/u_format.h"
28
29 #include "nv50_context.h"
30
31 static boolean
32 nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
33
34 static boolean
35 nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
36
37 static boolean
38 nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
39
40 static boolean
41 nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
42
43 #define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
44
45 static INLINE unsigned
46 nv50_prim(unsigned mode)
47 {
48 switch (mode) {
49 case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
50 case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
51 case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
52 case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
53 case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
54 case PIPE_PRIM_TRIANGLE_STRIP:
55 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
56 case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
57 case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
58 case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
59 case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
60 case PIPE_PRIM_LINES_ADJACENCY:
61 return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
62 case PIPE_PRIM_LINE_STRIP_ADJACENCY:
63 return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
64 case PIPE_PRIM_TRIANGLES_ADJACENCY:
65 return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
66 case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
67 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
68 default:
69 break;
70 }
71
72 NOUVEAU_ERR("invalid primitive type %d\n", mode);
73 return NV50TCL_VERTEX_BEGIN_POINTS;
74 }
75
76 static INLINE uint32_t
77 nv50_vbo_type_to_hw(enum pipe_format format)
78 {
79 const struct util_format_description *desc;
80
81 desc = util_format_description(format);
82 assert(desc);
83
84 switch (desc->channel[0].type) {
85 case UTIL_FORMAT_TYPE_FLOAT:
86 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
87 case UTIL_FORMAT_TYPE_UNSIGNED:
88 if (desc->channel[0].normalized) {
89 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
90 }
91 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
92 case UTIL_FORMAT_TYPE_SIGNED:
93 if (desc->channel[0].normalized) {
94 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
95 }
96 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
97 /*
98 case PIPE_FORMAT_TYPE_UINT:
99 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
100 case PIPE_FORMAT_TYPE_SINT:
101 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */
102 default:
103 return 0;
104 }
105 }
106
107 static INLINE uint32_t
108 nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
109 {
110 static const uint32_t hw_values[] = {
111 0, 0, 0, 0,
112 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
113 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
114 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
115 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
116 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
117 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
118 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
119 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
120 0, 0, 0, 0,
121 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
122 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
123 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
124 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
125
126 /* we'd also have R11G11B10 and R10G10B10A2 */
127
128 assert(nr_c > 0 && nr_c <= 4);
129
130 if (size > 32)
131 return 0;
132 size >>= (3 - 2);
133
134 return hw_values[size + (nr_c - 1)];
135 }
136
137 static INLINE uint32_t
138 nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
139 {
140 uint32_t hw_type, hw_size;
141 enum pipe_format pf = ve->src_format;
142 const struct util_format_description *desc;
143 unsigned size;
144
145 desc = util_format_description(pf);
146 assert(desc);
147
148 size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
149
150 hw_type = nv50_vbo_type_to_hw(pf);
151 hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
152
153 if (!hw_type || !hw_size) {
154 NOUVEAU_ERR("unsupported vbo format: %s\n", pf_name(pf));
155 abort();
156 return 0x24e80000;
157 }
158
159 if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
160 hw_size |= (1 << 31); /* no real swizzle bits :-( */
161
162 return (hw_type | hw_size);
163 }
164
165 /* For instanced drawing from user buffers, hitting the FIFO repeatedly
166 * with the same vertex data is probably worse than uploading all data.
167 */
168 static boolean
169 nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
170 {
171 struct nv50_screen *nscreen = nv50->screen;
172 struct pipe_screen *pscreen = &nscreen->base.base;
173 struct pipe_buffer *buf = nscreen->strm_vbuf[i];
174 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
175 uint8_t *src;
176 unsigned size = align(vb->buffer->size, 4096);
177
178 if (buf && buf->size < size)
179 pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
180
181 if (!nscreen->strm_vbuf[i]) {
182 nscreen->strm_vbuf[i] = pipe_buffer_create(
183 pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
184 buf = nscreen->strm_vbuf[i];
185 }
186
187 src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
188 if (!src)
189 return FALSE;
190 src += vb->buffer_offset;
191
192 size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
193 if (vb->buffer_offset + size > vb->buffer->size)
194 size = vb->buffer->size - vb->buffer_offset;
195
196 pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
197 pipe_buffer_unmap(pscreen, vb->buffer);
198
199 vb->buffer = buf; /* don't pipe_reference, this is a private copy */
200 return TRUE;
201 }
202
203 static void
204 nv50_upload_user_vbufs(struct nv50_context *nv50)
205 {
206 unsigned i;
207
208 if (nv50->vbo_fifo)
209 nv50->dirty |= NV50_NEW_ARRAYS;
210 if (!(nv50->dirty & NV50_NEW_ARRAYS))
211 return;
212
213 for (i = 0; i < nv50->vtxbuf_nr; ++i) {
214 if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
215 continue;
216 nv50_upload_vtxbuf(nv50, i);
217 }
218 }
219
220 static void
221 nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
222 {
223 struct nouveau_grobj *tesla = nv50->screen->tesla;
224 struct nouveau_channel *chan = tesla->channel;
225 float v[4];
226
227 util_format_read_4f(nv50->vtxelt[i].src_format,
228 v, 0, data, 0, 0, 0, 1, 1);
229
230 switch (nv50->vtxelt[i].nr_components) {
231 case 4:
232 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
233 OUT_RINGf (chan, v[0]);
234 OUT_RINGf (chan, v[1]);
235 OUT_RINGf (chan, v[2]);
236 OUT_RINGf (chan, v[3]);
237 break;
238 case 3:
239 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3);
240 OUT_RINGf (chan, v[0]);
241 OUT_RINGf (chan, v[1]);
242 OUT_RINGf (chan, v[2]);
243 break;
244 case 2:
245 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2);
246 OUT_RINGf (chan, v[0]);
247 OUT_RINGf (chan, v[1]);
248 break;
249 case 1:
250 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1);
251 OUT_RINGf (chan, v[0]);
252 break;
253 default:
254 assert(0);
255 break;
256 }
257 }
258
259 static unsigned
260 init_per_instance_arrays_immd(struct nv50_context *nv50,
261 unsigned startInstance,
262 unsigned pos[16], unsigned step[16])
263 {
264 struct nouveau_bo *bo;
265 unsigned i, b, count = 0;
266
267 for (i = 0; i < nv50->vtxelt_nr; ++i) {
268 if (!nv50->vtxelt[i].instance_divisor)
269 continue;
270 ++count;
271 b = nv50->vtxelt[i].vertex_buffer_index;
272
273 pos[i] = nv50->vtxelt[i].src_offset +
274 nv50->vtxbuf[b].buffer_offset +
275 startInstance * nv50->vtxbuf[b].stride;
276 step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
277
278 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
279 if (!bo->map)
280 nouveau_bo_map(bo, NOUVEAU_BO_RD);
281
282 nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
283 }
284
285 return count;
286 }
287
288 static unsigned
289 init_per_instance_arrays(struct nv50_context *nv50,
290 unsigned startInstance,
291 unsigned pos[16], unsigned step[16])
292 {
293 struct nouveau_grobj *tesla = nv50->screen->tesla;
294 struct nouveau_channel *chan = tesla->channel;
295 struct nouveau_bo *bo;
296 struct nouveau_stateobj *so;
297 unsigned i, b, count = 0;
298 const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
299
300 if (nv50->vbo_fifo)
301 return init_per_instance_arrays_immd(nv50, startInstance,
302 pos, step);
303
304 so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
305
306 for (i = 0; i < nv50->vtxelt_nr; ++i) {
307 if (!nv50->vtxelt[i].instance_divisor)
308 continue;
309 ++count;
310 b = nv50->vtxelt[i].vertex_buffer_index;
311
312 pos[i] = nv50->vtxelt[i].src_offset +
313 nv50->vtxbuf[b].buffer_offset +
314 startInstance * nv50->vtxbuf[b].stride;
315
316 if (!startInstance) {
317 step[i] = 0;
318 continue;
319 }
320 step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
321
322 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
323
324 so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
325 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
326 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
327 }
328
329 if (count && startInstance) {
330 so_ref (so, &nv50->state.instbuf); /* for flush notify */
331 so_emit(chan, nv50->state.instbuf);
332 }
333 so_ref (NULL, &so);
334
335 return count;
336 }
337
338 static void
339 step_per_instance_arrays_immd(struct nv50_context *nv50,
340 unsigned pos[16], unsigned step[16])
341 {
342 struct nouveau_bo *bo;
343 unsigned i, b;
344
345 for (i = 0; i < nv50->vtxelt_nr; ++i) {
346 if (!nv50->vtxelt[i].instance_divisor)
347 continue;
348 if (++step[i] != nv50->vtxelt[i].instance_divisor)
349 continue;
350 b = nv50->vtxelt[i].vertex_buffer_index;
351 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
352
353 step[i] = 0;
354 pos[i] += nv50->vtxbuf[b].stride;
355
356 nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
357 }
358 }
359
360 static void
361 step_per_instance_arrays(struct nv50_context *nv50,
362 unsigned pos[16], unsigned step[16])
363 {
364 struct nouveau_grobj *tesla = nv50->screen->tesla;
365 struct nouveau_channel *chan = tesla->channel;
366 struct nouveau_bo *bo;
367 struct nouveau_stateobj *so;
368 unsigned i, b;
369 const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
370
371 if (nv50->vbo_fifo) {
372 step_per_instance_arrays_immd(nv50, pos, step);
373 return;
374 }
375
376 so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
377
378 for (i = 0; i < nv50->vtxelt_nr; ++i) {
379 if (!nv50->vtxelt[i].instance_divisor)
380 continue;
381 b = nv50->vtxelt[i].vertex_buffer_index;
382
383 if (++step[i] == nv50->vtxelt[i].instance_divisor) {
384 step[i] = 0;
385 pos[i] += nv50->vtxbuf[b].stride;
386 }
387
388 bo = nouveau_bo(nv50->vtxbuf[b].buffer);
389
390 so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
391 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
392 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
393 }
394
395 so_ref (so, &nv50->state.instbuf); /* for flush notify */
396 so_ref (NULL, &so);
397
398 so_emit(chan, nv50->state.instbuf);
399 }
400
401 static INLINE void
402 nv50_unmap_vbufs(struct nv50_context *nv50)
403 {
404 unsigned i;
405
406 for (i = 0; i < nv50->vtxbuf_nr; ++i)
407 if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
408 nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
409 }
410
411 void
412 nv50_draw_arrays_instanced(struct pipe_context *pipe,
413 unsigned mode, unsigned start, unsigned count,
414 unsigned startInstance, unsigned instanceCount)
415 {
416 struct nv50_context *nv50 = nv50_context(pipe);
417 struct nouveau_channel *chan = nv50->screen->tesla->channel;
418 struct nouveau_grobj *tesla = nv50->screen->tesla;
419 unsigned i, nz_divisors;
420 unsigned step[16], pos[16];
421
422 if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
423 nv50_upload_user_vbufs(nv50);
424
425 nv50_state_validate(nv50);
426
427 nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
428
429 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
430 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
431 OUT_RING (chan, startInstance);
432
433 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
434 OUT_RING (chan, nv50_prim(mode));
435
436 if (nv50->vbo_fifo)
437 nv50_push_arrays(nv50, start, count);
438 else {
439 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
440 OUT_RING (chan, start);
441 OUT_RING (chan, count);
442 }
443 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
444 OUT_RING (chan, 0);
445
446 for (i = 1; i < instanceCount; i++) {
447 if (nz_divisors) /* any non-zero array divisors ? */
448 step_per_instance_arrays(nv50, pos, step);
449
450 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
451 OUT_RING (chan, nv50_prim(mode) | (1 << 28));
452
453 if (nv50->vbo_fifo)
454 nv50_push_arrays(nv50, start, count);
455 else {
456 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
457 OUT_RING (chan, start);
458 OUT_RING (chan, count);
459 }
460 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
461 OUT_RING (chan, 0);
462 }
463 nv50_unmap_vbufs(nv50);
464
465 so_ref(NULL, &nv50->state.instbuf);
466 }
467
468 void
469 nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
470 unsigned count)
471 {
472 struct nv50_context *nv50 = nv50_context(pipe);
473 struct nouveau_channel *chan = nv50->screen->tesla->channel;
474 struct nouveau_grobj *tesla = nv50->screen->tesla;
475 boolean ret;
476
477 nv50_state_validate(nv50);
478
479 BEGIN_RING(chan, tesla, 0x142c, 1);
480 OUT_RING (chan, 0);
481 BEGIN_RING(chan, tesla, 0x142c, 1);
482 OUT_RING (chan, 0);
483
484 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
485 OUT_RING (chan, nv50_prim(mode));
486
487 if (nv50->vbo_fifo)
488 ret = nv50_push_arrays(nv50, start, count);
489 else {
490 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
491 OUT_RING (chan, start);
492 OUT_RING (chan, count);
493 ret = TRUE;
494 }
495 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
496 OUT_RING (chan, 0);
497
498 nv50_unmap_vbufs(nv50);
499
500 /* XXX: not sure what to do if ret != TRUE: flush and retry?
501 */
502 assert(ret);
503 }
504
505 static INLINE boolean
506 nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
507 unsigned start, unsigned count)
508 {
509 struct nouveau_channel *chan = nv50->screen->tesla->channel;
510 struct nouveau_grobj *tesla = nv50->screen->tesla;
511
512 map += start;
513
514 if (nv50->vbo_fifo)
515 return nv50_push_elements_u08(nv50, map, count);
516
517 if (count & 1) {
518 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
519 OUT_RING (chan, map[0]);
520 map++;
521 count--;
522 }
523
524 while (count) {
525 unsigned nr = count > 2046 ? 2046 : count;
526 int i;
527
528 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
529 for (i = 0; i < nr; i += 2)
530 OUT_RING (chan, (map[i + 1] << 16) | map[i]);
531
532 count -= nr;
533 map += nr;
534 }
535 return TRUE;
536 }
537
538 static INLINE boolean
539 nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
540 unsigned start, unsigned count)
541 {
542 struct nouveau_channel *chan = nv50->screen->tesla->channel;
543 struct nouveau_grobj *tesla = nv50->screen->tesla;
544
545 map += start;
546
547 if (nv50->vbo_fifo)
548 return nv50_push_elements_u16(nv50, map, count);
549
550 if (count & 1) {
551 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
552 OUT_RING (chan, map[0]);
553 map++;
554 count--;
555 }
556
557 while (count) {
558 unsigned nr = count > 2046 ? 2046 : count;
559 int i;
560
561 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
562 for (i = 0; i < nr; i += 2)
563 OUT_RING (chan, (map[i + 1] << 16) | map[i]);
564
565 count -= nr;
566 map += nr;
567 }
568 return TRUE;
569 }
570
571 static INLINE boolean
572 nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
573 unsigned start, unsigned count)
574 {
575 struct nouveau_channel *chan = nv50->screen->tesla->channel;
576 struct nouveau_grobj *tesla = nv50->screen->tesla;
577
578 map += start;
579
580 if (nv50->vbo_fifo)
581 return nv50_push_elements_u32(nv50, map, count);
582
583 while (count) {
584 unsigned nr = count > 2047 ? 2047 : count;
585
586 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
587 OUT_RINGp (chan, map, nr);
588
589 count -= nr;
590 map += nr;
591 }
592 return TRUE;
593 }
594
595 static INLINE void
596 nv50_draw_elements_inline(struct nv50_context *nv50,
597 void *map, unsigned indexSize,
598 unsigned start, unsigned count)
599 {
600 switch (indexSize) {
601 case 1:
602 nv50_draw_elements_inline_u08(nv50, map, start, count);
603 break;
604 case 2:
605 nv50_draw_elements_inline_u16(nv50, map, start, count);
606 break;
607 case 4:
608 nv50_draw_elements_inline_u32(nv50, map, start, count);
609 break;
610 }
611 }
612
613 void
614 nv50_draw_elements_instanced(struct pipe_context *pipe,
615 struct pipe_buffer *indexBuffer,
616 unsigned indexSize,
617 unsigned mode, unsigned start, unsigned count,
618 unsigned startInstance, unsigned instanceCount)
619 {
620 struct nv50_context *nv50 = nv50_context(pipe);
621 struct nouveau_grobj *tesla = nv50->screen->tesla;
622 struct nouveau_channel *chan = tesla->channel;
623 struct pipe_screen *pscreen = pipe->screen;
624 void *map;
625 unsigned i, nz_divisors;
626 unsigned step[16], pos[16];
627
628 map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
629
630 if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
631 nv50_upload_user_vbufs(nv50);
632
633 nv50_state_validate(nv50);
634
635 nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
636
637 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
638 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
639 OUT_RING (chan, startInstance);
640
641 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
642 OUT_RING (chan, nv50_prim(mode));
643
644 nv50_draw_elements_inline(nv50, map, indexSize, start, count);
645
646 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
647 OUT_RING (chan, 0);
648
649 for (i = 1; i < instanceCount; ++i) {
650 if (nz_divisors) /* any non-zero array divisors ? */
651 step_per_instance_arrays(nv50, pos, step);
652
653 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
654 OUT_RING (chan, nv50_prim(mode) | (1 << 28));
655
656 nv50_draw_elements_inline(nv50, map, indexSize, start, count);
657
658 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
659 OUT_RING (chan, 0);
660 }
661 nv50_unmap_vbufs(nv50);
662
663 so_ref(NULL, &nv50->state.instbuf);
664 }
665
666 void
667 nv50_draw_elements(struct pipe_context *pipe,
668 struct pipe_buffer *indexBuffer, unsigned indexSize,
669 unsigned mode, unsigned start, unsigned count)
670 {
671 struct nv50_context *nv50 = nv50_context(pipe);
672 struct nouveau_channel *chan = nv50->screen->tesla->channel;
673 struct nouveau_grobj *tesla = nv50->screen->tesla;
674 struct pipe_screen *pscreen = pipe->screen;
675 void *map;
676
677 map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
678
679 nv50_state_validate(nv50);
680
681 BEGIN_RING(chan, tesla, 0x142c, 1);
682 OUT_RING (chan, 0);
683 BEGIN_RING(chan, tesla, 0x142c, 1);
684 OUT_RING (chan, 0);
685
686 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
687 OUT_RING (chan, nv50_prim(mode));
688
689 nv50_draw_elements_inline(nv50, map, indexSize, start, count);
690
691 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
692 OUT_RING (chan, 0);
693
694 nv50_unmap_vbufs(nv50);
695
696 pipe_buffer_unmap(pscreen, indexBuffer);
697 }
698
699 static INLINE boolean
700 nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
701 struct nouveau_stateobj **pso,
702 struct pipe_vertex_element *ve,
703 struct pipe_vertex_buffer *vb)
704
705 {
706 struct nouveau_stateobj *so;
707 struct nouveau_grobj *tesla = nv50->screen->tesla;
708 struct nouveau_bo *bo = nouveau_bo(vb->buffer);
709 float v[4];
710 int ret;
711
712 ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
713 if (ret)
714 return FALSE;
715
716 util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map +
717 (vb->buffer_offset + ve->src_offset), 0,
718 0, 0, 1, 1);
719 so = *pso;
720 if (!so)
721 *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
722
723 switch (ve->nr_components) {
724 case 4:
725 so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
726 so_data (so, fui(v[0]));
727 so_data (so, fui(v[1]));
728 so_data (so, fui(v[2]));
729 so_data (so, fui(v[3]));
730 break;
731 case 3:
732 so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3);
733 so_data (so, fui(v[0]));
734 so_data (so, fui(v[1]));
735 so_data (so, fui(v[2]));
736 break;
737 case 2:
738 so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2);
739 so_data (so, fui(v[0]));
740 so_data (so, fui(v[1]));
741 break;
742 case 1:
743 if (attrib == nv50->vertprog->cfg.edgeflag_in) {
744 so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
745 so_data (so, v[0] ? 1 : 0);
746 }
747 so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
748 so_data (so, fui(v[0]));
749 break;
750 default:
751 nouveau_bo_unmap(bo);
752 return FALSE;
753 }
754
755 nouveau_bo_unmap(bo);
756 return TRUE;
757 }
758
759 void
760 nv50_vbo_validate(struct nv50_context *nv50)
761 {
762 struct nouveau_grobj *tesla = nv50->screen->tesla;
763 struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr;
764 unsigned i, n_ve;
765
766 /* don't validate if Gallium took away our buffers */
767 if (nv50->vtxbuf_nr == 0)
768 return;
769 nv50->vbo_fifo = 0;
770
771 for (i = 0; i < nv50->vtxbuf_nr; ++i)
772 if (nv50->vtxbuf[i].stride &&
773 !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
774 nv50->vbo_fifo = 0xffff;
775
776 if (NV50_USING_LOATHED_EDGEFLAG(nv50))
777 nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
778
779 n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
780
781 vtxattr = NULL;
782 vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
783 vtxfmt = so_new(1, n_ve, 0);
784 so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
785
786 for (i = 0; i < nv50->vtxelt_nr; i++) {
787 struct pipe_vertex_element *ve = &nv50->vtxelt[i];
788 struct pipe_vertex_buffer *vb =
789 &nv50->vtxbuf[ve->vertex_buffer_index];
790 struct nouveau_bo *bo = nouveau_bo(vb->buffer);
791 uint32_t hw = nv50_vbo_vtxelt_to_hw(ve);
792
793 if (!vb->stride &&
794 nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
795 so_data(vtxfmt, hw | (1 << 4));
796
797 so_method(vtxbuf, tesla,
798 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
799 so_data (vtxbuf, 0);
800
801 nv50->vbo_fifo &= ~(1 << i);
802 continue;
803 }
804
805 if (nv50->vbo_fifo) {
806 so_data (vtxfmt, hw |
807 (ve->instance_divisor ? (1 << 4) : i));
808 so_method(vtxbuf, tesla,
809 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
810 so_data (vtxbuf, 0);
811 continue;
812 }
813 so_data(vtxfmt, hw | i);
814
815 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
816 so_data (vtxbuf, 0x20000000 |
817 (ve->instance_divisor ? 0 : vb->stride));
818 so_reloc (vtxbuf, bo, vb->buffer_offset +
819 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
820 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
821 so_reloc (vtxbuf, bo, vb->buffer_offset +
822 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
823 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
824
825 /* vertex array limits */
826 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
827 so_reloc (vtxbuf, bo, vb->buffer->size - 1,
828 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
829 NOUVEAU_BO_HIGH, 0, 0);
830 so_reloc (vtxbuf, bo, vb->buffer->size - 1,
831 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
832 NOUVEAU_BO_LOW, 0, 0);
833 }
834 for (; i < n_ve; ++i) {
835 so_data (vtxfmt, 0x7e080010);
836
837 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
838 so_data (vtxbuf, 0);
839 }
840 nv50->state.vtxelt_nr = nv50->vtxelt_nr;
841
842 so_ref (vtxfmt, &nv50->state.vtxfmt);
843 so_ref (vtxbuf, &nv50->state.vtxbuf);
844 so_ref (vtxattr, &nv50->state.vtxattr);
845 so_ref (NULL, &vtxbuf);
846 so_ref (NULL, &vtxfmt);
847 so_ref (NULL, &vtxattr);
848 }
849
850 typedef void (*pfn_push)(struct nouveau_channel *, void *);
851
852 struct nv50_vbo_emitctx
853 {
854 pfn_push push[16];
855 uint8_t *map[16];
856 unsigned stride[16];
857 unsigned nr_ve;
858 unsigned vtx_dwords;
859 unsigned vtx_max;
860
861 float edgeflag;
862 unsigned ve_edgeflag;
863 };
864
865 static INLINE void
866 emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
867 {
868 unsigned i;
869
870 for (i = 0; i < emit->nr_ve; ++i) {
871 emit->push[i](chan, emit->map[i]);
872 emit->map[i] += emit->stride[i];
873 }
874 }
875
876 static INLINE void
877 emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
878 uint32_t vi)
879 {
880 unsigned i;
881
882 for (i = 0; i < emit->nr_ve; ++i)
883 emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
884 }
885
886 static INLINE boolean
887 nv50_map_vbufs(struct nv50_context *nv50)
888 {
889 int i;
890
891 for (i = 0; i < nv50->vtxbuf_nr; ++i) {
892 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
893 unsigned size = vb->stride * (vb->max_index + 1) + 16;
894
895 if (nouveau_bo(vb->buffer)->map)
896 continue;
897
898 size = vb->stride * (vb->max_index + 1) + 16;
899 size = MIN2(size, vb->buffer->size);
900 if (!size)
901 size = vb->buffer->size;
902
903 if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
904 0, size, NOUVEAU_BO_RD))
905 break;
906 }
907
908 if (i == nv50->vtxbuf_nr)
909 return TRUE;
910 for (; i >= 0; --i)
911 nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
912 return FALSE;
913 }
914
915 static void
916 emit_b32_1(struct nouveau_channel *chan, void *data)
917 {
918 uint32_t *v = data;
919
920 OUT_RING(chan, v[0]);
921 }
922
923 static void
924 emit_b32_2(struct nouveau_channel *chan, void *data)
925 {
926 uint32_t *v = data;
927
928 OUT_RING(chan, v[0]);
929 OUT_RING(chan, v[1]);
930 }
931
932 static void
933 emit_b32_3(struct nouveau_channel *chan, void *data)
934 {
935 uint32_t *v = data;
936
937 OUT_RING(chan, v[0]);
938 OUT_RING(chan, v[1]);
939 OUT_RING(chan, v[2]);
940 }
941
942 static void
943 emit_b32_4(struct nouveau_channel *chan, void *data)
944 {
945 uint32_t *v = data;
946
947 OUT_RING(chan, v[0]);
948 OUT_RING(chan, v[1]);
949 OUT_RING(chan, v[2]);
950 OUT_RING(chan, v[3]);
951 }
952
953 static void
954 emit_b16_1(struct nouveau_channel *chan, void *data)
955 {
956 uint16_t *v = data;
957
958 OUT_RING(chan, v[0]);
959 }
960
961 static void
962 emit_b16_3(struct nouveau_channel *chan, void *data)
963 {
964 uint16_t *v = data;
965
966 OUT_RING(chan, (v[1] << 16) | v[0]);
967 OUT_RING(chan, v[2]);
968 }
969
970 static void
971 emit_b08_1(struct nouveau_channel *chan, void *data)
972 {
973 uint8_t *v = data;
974
975 OUT_RING(chan, v[0]);
976 }
977
978 static void
979 emit_b08_3(struct nouveau_channel *chan, void *data)
980 {
981 uint8_t *v = data;
982
983 OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
984 }
985
986 static boolean
987 emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
988 unsigned start)
989 {
990 unsigned i;
991
992 if (nv50_map_vbufs(nv50) == FALSE)
993 return FALSE;
994
995 emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
996
997 emit->edgeflag = 0.5f;
998 emit->nr_ve = 0;
999 emit->vtx_dwords = 0;
1000
1001 for (i = 0; i < nv50->vtxelt_nr; ++i) {
1002 struct pipe_vertex_element *ve;
1003 struct pipe_vertex_buffer *vb;
1004 unsigned n, size;
1005 const struct util_format_description *desc;
1006
1007 ve = &nv50->vtxelt[i];
1008 vb = &nv50->vtxbuf[ve->vertex_buffer_index];
1009 if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
1010 continue;
1011 n = emit->nr_ve++;
1012
1013 emit->stride[n] = vb->stride;
1014 emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
1015 vb->buffer_offset +
1016 (start * vb->stride + ve->src_offset);
1017
1018 desc = util_format_description(ve->src_format);
1019 assert(desc);
1020
1021 size = util_format_get_component_bits(
1022 ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
1023
1024 assert(ve->nr_components > 0 && ve->nr_components <= 4);
1025
1026 /* It shouldn't be necessary to push the implicit 1s
1027 * for case 3 and size 8 cases 1, 2, 3.
1028 */
1029 switch (size) {
1030 default:
1031 NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
1032 return FALSE;
1033 case 32:
1034 switch (ve->nr_components) {
1035 case 1: emit->push[n] = emit_b32_1; break;
1036 case 2: emit->push[n] = emit_b32_2; break;
1037 case 3: emit->push[n] = emit_b32_3; break;
1038 case 4: emit->push[n] = emit_b32_4; break;
1039 }
1040 emit->vtx_dwords += ve->nr_components;
1041 break;
1042 case 16:
1043 switch (ve->nr_components) {
1044 case 1: emit->push[n] = emit_b16_1; break;
1045 case 2: emit->push[n] = emit_b32_1; break;
1046 case 3: emit->push[n] = emit_b16_3; break;
1047 case 4: emit->push[n] = emit_b32_2; break;
1048 }
1049 emit->vtx_dwords += (ve->nr_components + 1) >> 1;
1050 break;
1051 case 8:
1052 switch (ve->nr_components) {
1053 case 1: emit->push[n] = emit_b08_1; break;
1054 case 2: emit->push[n] = emit_b16_1; break;
1055 case 3: emit->push[n] = emit_b08_3; break;
1056 case 4: emit->push[n] = emit_b32_1; break;
1057 }
1058 emit->vtx_dwords += 1;
1059 break;
1060 }
1061 }
1062
1063 emit->vtx_max = 512 / emit->vtx_dwords;
1064 if (emit->ve_edgeflag < 16)
1065 emit->vtx_max = 1;
1066
1067 return TRUE;
1068 }
1069
1070 static INLINE void
1071 set_edgeflag(struct nouveau_channel *chan,
1072 struct nouveau_grobj *tesla,
1073 struct nv50_vbo_emitctx *emit, uint32_t index)
1074 {
1075 unsigned i = emit->ve_edgeflag;
1076
1077 if (i < 16) {
1078 float f = *((float *)(emit->map[i] + index * emit->stride[i]));
1079
1080 if (emit->edgeflag != f) {
1081 emit->edgeflag = f;
1082
1083 BEGIN_RING(chan, tesla, 0x15e4, 1);
1084 OUT_RING (chan, f ? 1 : 0);
1085 }
1086 }
1087 }
1088
1089 static boolean
1090 nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
1091 {
1092 struct nouveau_channel *chan = nv50->screen->base.channel;
1093 struct nouveau_grobj *tesla = nv50->screen->tesla;
1094 struct nv50_vbo_emitctx emit;
1095
1096 if (emit_prepare(nv50, &emit, start) == FALSE)
1097 return FALSE;
1098
1099 while (count) {
1100 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1101 dw = nr * emit.vtx_dwords;
1102
1103 set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
1104
1105 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1106 for (i = 0; i < nr; ++i)
1107 emit_vtx_next(chan, &emit);
1108
1109 count -= nr;
1110 }
1111
1112 return TRUE;
1113 }
1114
1115 static boolean
1116 nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
1117 {
1118 struct nouveau_channel *chan = nv50->screen->base.channel;
1119 struct nouveau_grobj *tesla = nv50->screen->tesla;
1120 struct nv50_vbo_emitctx emit;
1121
1122 if (emit_prepare(nv50, &emit, 0) == FALSE)
1123 return FALSE;
1124
1125 while (count) {
1126 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1127 dw = nr * emit.vtx_dwords;
1128
1129 set_edgeflag(chan, tesla, &emit, *map);
1130
1131 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1132 for (i = 0; i < nr; ++i)
1133 emit_vtx(chan, &emit, *map++);
1134
1135 count -= nr;
1136 }
1137
1138 return TRUE;
1139 }
1140
1141 static boolean
1142 nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
1143 {
1144 struct nouveau_channel *chan = nv50->screen->base.channel;
1145 struct nouveau_grobj *tesla = nv50->screen->tesla;
1146 struct nv50_vbo_emitctx emit;
1147
1148 if (emit_prepare(nv50, &emit, 0) == FALSE)
1149 return FALSE;
1150
1151 while (count) {
1152 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1153 dw = nr * emit.vtx_dwords;
1154
1155 set_edgeflag(chan, tesla, &emit, *map);
1156
1157 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1158 for (i = 0; i < nr; ++i)
1159 emit_vtx(chan, &emit, *map++);
1160
1161 count -= nr;
1162 }
1163
1164 return TRUE;
1165 }
1166
1167 static boolean
1168 nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
1169 {
1170 struct nouveau_channel *chan = nv50->screen->base.channel;
1171 struct nouveau_grobj *tesla = nv50->screen->tesla;
1172 struct nv50_vbo_emitctx emit;
1173
1174 if (emit_prepare(nv50, &emit, 0) == FALSE)
1175 return FALSE;
1176
1177 while (count) {
1178 unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1179 dw = nr * emit.vtx_dwords;
1180
1181 set_edgeflag(chan, tesla, &emit, *map);
1182
1183 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1184 for (i = 0; i < nr; ++i)
1185 emit_vtx(chan, &emit, *map++);
1186
1187 count -= nr;
1188 }
1189
1190 return TRUE;
1191 }