Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / gallium / drivers / nv50 / nv50_vbo.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_state.h"
25 #include "util/u_inlines.h"
26 #include "util/u_format.h"
27 #include "util/u_split_prim.h"
28
29 #include "nv50_context.h"
30 #include "nv50_resource.h"
31
32 struct instance {
33 struct nouveau_bo *bo;
34 unsigned delta;
35 unsigned stride;
36 unsigned step;
37 unsigned divisor;
38 };
39
40 static void
41 instance_init(struct nv50_context *nv50, struct instance *a, unsigned first)
42 {
43 int i;
44
45 for (i = 0; i < nv50->vtxelt->num_elements; i++) {
46 struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
47 struct pipe_vertex_buffer *vb;
48
49 a[i].divisor = ve->instance_divisor;
50 if (a[i].divisor) {
51 vb = &nv50->vtxbuf[ve->vertex_buffer_index];
52
53 a[i].bo = nv50_resource(vb->buffer)->bo;
54 a[i].stride = vb->stride;
55 a[i].step = first % a[i].divisor;
56 a[i].delta = vb->buffer_offset + ve->src_offset +
57 (first * a[i].stride);
58 }
59 }
60 }
61
62 static void
63 instance_step(struct nv50_context *nv50, struct instance *a)
64 {
65 struct nouveau_channel *chan = nv50->screen->tesla->channel;
66 struct nouveau_grobj *tesla = nv50->screen->tesla;
67 int i;
68
69 for (i = 0; i < nv50->vtxelt->num_elements; i++) {
70 if (!a[i].divisor)
71 continue;
72
73 BEGIN_RING(chan, tesla,
74 NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
75 OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
76 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
77 OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
78 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
79 if (++a[i].step == a[i].divisor) {
80 a[i].step = 0;
81 a[i].delta += a[i].stride;
82 }
83 }
84 }
85
86 static void
87 nv50_draw_arrays_instanced(struct pipe_context *pipe,
88 unsigned mode, unsigned start, unsigned count,
89 unsigned startInstance, unsigned instanceCount)
90 {
91 struct nv50_context *nv50 = nv50_context(pipe);
92 struct nouveau_channel *chan = nv50->screen->tesla->channel;
93 struct nouveau_grobj *tesla = nv50->screen->tesla;
94 struct instance a[16];
95 unsigned prim = nv50_prim(mode);
96
97 instance_init(nv50, a, startInstance);
98 if (!nv50_state_validate(nv50, 10 + 16*3))
99 return;
100
101 if (nv50->vbo_fifo) {
102 nv50_push_elements_instanced(pipe, NULL, 0, 0, mode, start,
103 count, startInstance,
104 instanceCount);
105 return;
106 }
107
108 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
109 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
110 OUT_RING (chan, startInstance);
111 while (instanceCount--) {
112 if (AVAIL_RING(chan) < (7 + 16*3)) {
113 FIRE_RING(chan);
114 if (!nv50_state_validate(nv50, 7 + 16*3)) {
115 assert(0);
116 return;
117 }
118 }
119 instance_step(nv50, a);
120
121 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
122 OUT_RING (chan, prim);
123 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
124 OUT_RING (chan, start);
125 OUT_RING (chan, count);
126 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
127 OUT_RING (chan, 0);
128
129 prim |= (1 << 28);
130 }
131 }
132
133 struct inline_ctx {
134 struct nv50_context *nv50;
135 void *map;
136 };
137
138 static void
139 inline_elt08(void *priv, unsigned start, unsigned count)
140 {
141 struct inline_ctx *ctx = priv;
142 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
143 struct nouveau_channel *chan = tesla->channel;
144 uint8_t *map = (uint8_t *)ctx->map + start;
145
146 if (count & 1) {
147 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
148 OUT_RING (chan, map[0]);
149 map++;
150 count &= ~1;
151 }
152
153 count >>= 1;
154 if (!count)
155 return;
156
157 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
158 while (count--) {
159 OUT_RING(chan, (map[1] << 16) | map[0]);
160 map += 2;
161 }
162 }
163
164 static void
165 inline_elt16(void *priv, unsigned start, unsigned count)
166 {
167 struct inline_ctx *ctx = priv;
168 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
169 struct nouveau_channel *chan = tesla->channel;
170 uint16_t *map = (uint16_t *)ctx->map + start;
171
172 if (count & 1) {
173 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
174 OUT_RING (chan, map[0]);
175 count &= ~1;
176 map++;
177 }
178
179 count >>= 1;
180 if (!count)
181 return;
182
183 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
184 while (count--) {
185 OUT_RING(chan, (map[1] << 16) | map[0]);
186 map += 2;
187 }
188 }
189
190 static void
191 inline_elt32(void *priv, unsigned start, unsigned count)
192 {
193 struct inline_ctx *ctx = priv;
194 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
195 struct nouveau_channel *chan = tesla->channel;
196
197 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count);
198 OUT_RINGp (chan, (uint32_t *)ctx->map + start, count);
199 }
200
201 static void
202 inline_edgeflag(void *priv, boolean enabled)
203 {
204 struct inline_ctx *ctx = priv;
205 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
206 struct nouveau_channel *chan = tesla->channel;
207
208 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
209 OUT_RING (chan, enabled ? 1 : 0);
210 }
211
212 static void
213 nv50_draw_elements_inline(struct pipe_context *pipe,
214 struct pipe_resource *indexBuffer, unsigned indexSize,
215 unsigned mode, unsigned start, unsigned count,
216 unsigned startInstance, unsigned instanceCount)
217 {
218 struct nv50_context *nv50 = nv50_context(pipe);
219 struct nouveau_channel *chan = nv50->screen->tesla->channel;
220 struct nouveau_grobj *tesla = nv50->screen->tesla;
221 struct pipe_transfer *transfer;
222 struct instance a[16];
223 struct inline_ctx ctx;
224 struct util_split_prim s;
225 boolean nzi = FALSE;
226 unsigned overhead;
227
228 overhead = 16*3; /* potential instance adjustments */
229 overhead += 4; /* Begin()/End() */
230 overhead += 4; /* potential edgeflag disable/reenable */
231 overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
232
233 s.priv = &ctx;
234 if (indexSize == 1)
235 s.emit = inline_elt08;
236 else
237 if (indexSize == 2)
238 s.emit = inline_elt16;
239 else
240 s.emit = inline_elt32;
241 s.edge = inline_edgeflag;
242
243 ctx.nv50 = nv50;
244 ctx.map = pipe_buffer_map(pipe, indexBuffer, PIPE_TRANSFER_READ, &transfer);
245 assert(ctx.map);
246 if (!ctx.map)
247 return;
248
249 instance_init(nv50, a, startInstance);
250 if (!nv50_state_validate(nv50, overhead + 6 + 3))
251 return;
252
253 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
254 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
255 OUT_RING (chan, startInstance);
256 while (instanceCount--) {
257 unsigned max_verts;
258 boolean done;
259
260 util_split_prim_init(&s, mode, start, count);
261 do {
262 if (AVAIL_RING(chan) < (overhead + 6)) {
263 FIRE_RING(chan);
264 if (!nv50_state_validate(nv50, (overhead + 6))) {
265 assert(0);
266 return;
267 }
268 }
269
270 max_verts = AVAIL_RING(chan) - overhead;
271 if (max_verts > 2047)
272 max_verts = 2047;
273 if (indexSize != 4)
274 max_verts <<= 1;
275 instance_step(nv50, a);
276
277 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
278 OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
279 done = util_split_prim_next(&s, max_verts);
280 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
281 OUT_RING (chan, 0);
282 } while (!done);
283
284 nzi = TRUE;
285 }
286
287 pipe_buffer_unmap(pipe, indexBuffer, transfer);
288 }
289
290 static void
291 nv50_draw_elements_instanced(struct pipe_context *pipe,
292 struct pipe_resource *indexBuffer,
293 unsigned indexSize, int indexBias,
294 unsigned mode, unsigned start, unsigned count,
295 unsigned startInstance, unsigned instanceCount)
296 {
297 struct nv50_context *nv50 = nv50_context(pipe);
298 struct nouveau_channel *chan = nv50->screen->tesla->channel;
299 struct nouveau_grobj *tesla = nv50->screen->tesla;
300 struct instance a[16];
301 unsigned prim = nv50_prim(mode);
302
303 instance_init(nv50, a, startInstance);
304 if (!nv50_state_validate(nv50, 13 + 16*3))
305 return;
306
307 if (nv50->vbo_fifo) {
308 nv50_push_elements_instanced(pipe, indexBuffer, indexSize,
309 indexBias, mode, start, count,
310 startInstance, instanceCount);
311 return;
312 }
313
314 /* indices are uint32 internally, so large indexBias means negative */
315 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_BASE, 1);
316 OUT_RING (chan, indexBias);
317
318 if (!nv50_resource_mapped_by_gpu(indexBuffer) || indexSize == 1) {
319 nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
320 mode, start, count, startInstance,
321 instanceCount);
322 return;
323 }
324
325 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
326 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
327 OUT_RING (chan, startInstance);
328 while (instanceCount--) {
329 if (AVAIL_RING(chan) < (7 + 16*3)) {
330 FIRE_RING(chan);
331 if (!nv50_state_validate(nv50, 10 + 16*3)) {
332 assert(0);
333 return;
334 }
335 }
336 instance_step(nv50, a);
337
338 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
339 OUT_RING (chan, prim);
340 if (indexSize == 4) {
341 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
342 OUT_RING (chan, count);
343 nouveau_pushbuf_submit(chan,
344 nv50_resource(indexBuffer)->bo,
345 start << 2, count << 2);
346 } else
347 if (indexSize == 2) {
348 unsigned vb_start = (start & ~1);
349 unsigned vb_end = (start + count + 1) & ~1;
350 unsigned dwords = (vb_end - vb_start) >> 1;
351
352 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
353 OUT_RING (chan, ((start & 1) << 31) | count);
354 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
355 OUT_RING (chan, dwords);
356 nouveau_pushbuf_submit(chan,
357 nv50_resource(indexBuffer)->bo,
358 vb_start << 1, dwords << 2);
359 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
360 OUT_RING (chan, 0);
361 }
362 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
363 OUT_RING (chan, 0);
364
365 prim |= (1 << 28);
366 }
367 }
368
369 void
370 nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
371 {
372 struct nv50_context *nv50 = nv50_context(pipe);
373
374 if (info->indexed && nv50->idxbuf.buffer) {
375 unsigned offset;
376
377 assert(nv50->idxbuf.offset % nv50->idxbuf.index_size == 0);
378 offset = nv50->idxbuf.offset / nv50->idxbuf.index_size;
379
380 nv50_draw_elements_instanced(pipe,
381 nv50->idxbuf.buffer,
382 nv50->idxbuf.index_size,
383 info->index_bias,
384 info->mode,
385 info->start + offset,
386 info->count,
387 info->start_instance,
388 info->instance_count);
389 }
390 else {
391 nv50_draw_arrays_instanced(pipe,
392 info->mode,
393 info->start,
394 info->count,
395 info->start_instance,
396 info->instance_count);
397 }
398 }
399
400 static INLINE boolean
401 nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
402 struct nouveau_stateobj **pso,
403 struct pipe_vertex_element *ve,
404 struct pipe_vertex_buffer *vb)
405
406 {
407 struct nouveau_stateobj *so;
408 struct nouveau_grobj *tesla = nv50->screen->tesla;
409 struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
410 float v[4];
411 int ret;
412 unsigned nr_components = util_format_get_nr_components(ve->src_format);
413
414 ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
415 if (ret)
416 return FALSE;
417
418 util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map +
419 (vb->buffer_offset + ve->src_offset), 0,
420 0, 0, 1, 1);
421 so = *pso;
422 if (!so)
423 *pso = so = so_new(nv50->vtxelt->num_elements,
424 nv50->vtxelt->num_elements * 4, 0);
425
426 switch (nr_components) {
427 case 4:
428 so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
429 so_data (so, fui(v[0]));
430 so_data (so, fui(v[1]));
431 so_data (so, fui(v[2]));
432 so_data (so, fui(v[3]));
433 break;
434 case 3:
435 so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3);
436 so_data (so, fui(v[0]));
437 so_data (so, fui(v[1]));
438 so_data (so, fui(v[2]));
439 break;
440 case 2:
441 so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2);
442 so_data (so, fui(v[0]));
443 so_data (so, fui(v[1]));
444 break;
445 case 1:
446 if (attrib == nv50->vertprog->vp.edgeflag) {
447 so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
448 so_data (so, v[0] ? 1 : 0);
449 }
450 so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
451 so_data (so, fui(v[0]));
452 break;
453 default:
454 nouveau_bo_unmap(bo);
455 return FALSE;
456 }
457
458 nouveau_bo_unmap(bo);
459 return TRUE;
460 }
461
462 void
463 nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
464 {
465 unsigned i;
466
467 for (i = 0; i < cso->num_elements; ++i)
468 cso->hw[i] = nv50_format_table[cso->pipe[i].src_format].vtx;
469 }
470
471 struct nouveau_stateobj *
472 nv50_vbo_validate(struct nv50_context *nv50)
473 {
474 struct nouveau_grobj *tesla = nv50->screen->tesla;
475 struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr;
476 unsigned i, n_ve;
477
478 /* don't validate if Gallium took away our buffers */
479 if (nv50->vtxbuf_nr == 0)
480 return NULL;
481
482 nv50->vbo_fifo = 0;
483 if (nv50->screen->force_push ||
484 nv50->vertprog->vp.edgeflag < 16)
485 nv50->vbo_fifo = 0xffff;
486
487 for (i = 0; i < nv50->vtxbuf_nr; i++) {
488 if (nv50->vtxbuf[i].stride &&
489 !nv50_resource_mapped_by_gpu(nv50->vtxbuf[i].buffer))
490 nv50->vbo_fifo = 0xffff;
491 }
492
493 n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
494
495 vtxattr = NULL;
496 vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4);
497 vtxfmt = so_new(1, n_ve, 0);
498 so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
499
500 for (i = 0; i < nv50->vtxelt->num_elements; i++) {
501 struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
502 struct pipe_vertex_buffer *vb =
503 &nv50->vtxbuf[ve->vertex_buffer_index];
504 struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
505 uint32_t hw = nv50->vtxelt->hw[i];
506
507 if (!vb->stride &&
508 nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
509 so_data(vtxfmt, hw | (1 << 4));
510
511 so_method(vtxbuf, tesla,
512 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
513 so_data (vtxbuf, 0);
514
515 nv50->vbo_fifo &= ~(1 << i);
516 continue;
517 }
518
519 if (nv50->vbo_fifo) {
520 so_data (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i));
521 so_method(vtxbuf, tesla,
522 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
523 so_data (vtxbuf, 0);
524 continue;
525 }
526
527 so_data(vtxfmt, hw | i);
528
529 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
530 so_data (vtxbuf, 0x20000000 |
531 (ve->instance_divisor ? 0 : vb->stride));
532 so_reloc (vtxbuf, bo, vb->buffer_offset +
533 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
534 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
535 so_reloc (vtxbuf, bo, vb->buffer_offset +
536 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
537 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
538
539 /* vertex array limits */
540 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
541 so_reloc (vtxbuf, bo, vb->buffer->width0 - 1,
542 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
543 NOUVEAU_BO_HIGH, 0, 0);
544 so_reloc (vtxbuf, bo, vb->buffer->width0 - 1,
545 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
546 NOUVEAU_BO_LOW, 0, 0);
547 }
548 for (; i < n_ve; ++i) {
549 so_data (vtxfmt, 0x7e080010);
550
551 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
552 so_data (vtxbuf, 0);
553 }
554 nv50->state.vtxelt_nr = nv50->vtxelt->num_elements;
555
556 so_ref (vtxbuf, &nv50->state.vtxbuf);
557 so_ref (vtxattr, &nv50->state.vtxattr);
558 so_ref (NULL, &vtxbuf);
559 so_ref (NULL, &vtxattr);
560 return vtxfmt;
561 }
562
563