nv50: fix PSIZ and PRIMID mapping
[mesa.git] / src / gallium / drivers / nv50 / nv50_vbo.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_state.h"
25 #include "util/u_inlines.h"
26 #include "util/u_format.h"
27
28 #include "nouveau/nouveau_util.h"
29 #include "nv50_context.h"
30 #include "nv50_resource.h"
31
32 struct instance {
33 struct nouveau_bo *bo;
34 unsigned delta;
35 unsigned stride;
36 unsigned step;
37 unsigned divisor;
38 };
39
40 static void
41 instance_init(struct nv50_context *nv50, struct instance *a, unsigned first)
42 {
43 int i;
44
45 for (i = 0; i < nv50->vtxelt->num_elements; i++) {
46 struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
47 struct pipe_vertex_buffer *vb;
48
49 a[i].divisor = ve->instance_divisor;
50 if (a[i].divisor) {
51 vb = &nv50->vtxbuf[ve->vertex_buffer_index];
52
53 a[i].bo = nv50_resource(vb->buffer)->bo;
54 a[i].stride = vb->stride;
55 a[i].step = first % a[i].divisor;
56 a[i].delta = vb->buffer_offset + ve->src_offset +
57 (first * a[i].stride);
58 }
59 }
60 }
61
62 static void
63 instance_step(struct nv50_context *nv50, struct instance *a)
64 {
65 struct nouveau_channel *chan = nv50->screen->tesla->channel;
66 struct nouveau_grobj *tesla = nv50->screen->tesla;
67 int i;
68
69 for (i = 0; i < nv50->vtxelt->num_elements; i++) {
70 if (!a[i].divisor)
71 continue;
72
73 BEGIN_RING(chan, tesla,
74 NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
75 OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
76 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
77 OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
78 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
79 if (++a[i].step == a[i].divisor) {
80 a[i].step = 0;
81 a[i].delta += a[i].stride;
82 }
83 }
84 }
85
86 void
87 nv50_draw_arrays_instanced(struct pipe_context *pipe,
88 unsigned mode, unsigned start, unsigned count,
89 unsigned startInstance, unsigned instanceCount)
90 {
91 struct nv50_context *nv50 = nv50_context(pipe);
92 struct nouveau_channel *chan = nv50->screen->tesla->channel;
93 struct nouveau_grobj *tesla = nv50->screen->tesla;
94 struct instance a[16];
95 unsigned prim = nv50_prim(mode);
96
97 instance_init(nv50, a, startInstance);
98 if (!nv50_state_validate(nv50, 10 + 16*3))
99 return;
100
101 if (nv50->vbo_fifo) {
102 nv50_push_elements_instanced(pipe, NULL, 0, 0, mode, start,
103 count, startInstance,
104 instanceCount);
105 return;
106 }
107
108 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
109 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
110 OUT_RING (chan, startInstance);
111 while (instanceCount--) {
112 if (AVAIL_RING(chan) < (7 + 16*3)) {
113 FIRE_RING(chan);
114 if (!nv50_state_validate(nv50, 7 + 16*3)) {
115 assert(0);
116 return;
117 }
118 }
119 instance_step(nv50, a);
120
121 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
122 OUT_RING (chan, prim);
123 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
124 OUT_RING (chan, start);
125 OUT_RING (chan, count);
126 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
127 OUT_RING (chan, 0);
128
129 prim |= (1 << 28);
130 }
131 }
132
133 void
134 nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
135 unsigned count)
136 {
137 nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1);
138 }
139
140 struct inline_ctx {
141 struct nv50_context *nv50;
142 void *map;
143 };
144
145 static void
146 inline_elt08(void *priv, unsigned start, unsigned count)
147 {
148 struct inline_ctx *ctx = priv;
149 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
150 struct nouveau_channel *chan = tesla->channel;
151 uint8_t *map = (uint8_t *)ctx->map + start;
152
153 if (count & 1) {
154 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
155 OUT_RING (chan, map[0]);
156 map++;
157 count &= ~1;
158 }
159
160 count >>= 1;
161 if (!count)
162 return;
163
164 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
165 while (count--) {
166 OUT_RING(chan, (map[1] << 16) | map[0]);
167 map += 2;
168 }
169 }
170
171 static void
172 inline_elt16(void *priv, unsigned start, unsigned count)
173 {
174 struct inline_ctx *ctx = priv;
175 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
176 struct nouveau_channel *chan = tesla->channel;
177 uint16_t *map = (uint16_t *)ctx->map + start;
178
179 if (count & 1) {
180 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
181 OUT_RING (chan, map[0]);
182 count &= ~1;
183 map++;
184 }
185
186 count >>= 1;
187 if (!count)
188 return;
189
190 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
191 while (count--) {
192 OUT_RING(chan, (map[1] << 16) | map[0]);
193 map += 2;
194 }
195 }
196
197 static void
198 inline_elt32(void *priv, unsigned start, unsigned count)
199 {
200 struct inline_ctx *ctx = priv;
201 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
202 struct nouveau_channel *chan = tesla->channel;
203
204 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count);
205 OUT_RINGp (chan, (uint32_t *)ctx->map + start, count);
206 }
207
208 static void
209 inline_edgeflag(void *priv, boolean enabled)
210 {
211 struct inline_ctx *ctx = priv;
212 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
213 struct nouveau_channel *chan = tesla->channel;
214
215 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
216 OUT_RING (chan, enabled ? 1 : 0);
217 }
218
219 static void
220 nv50_draw_elements_inline(struct pipe_context *pipe,
221 struct pipe_resource *indexBuffer, unsigned indexSize,
222 unsigned mode, unsigned start, unsigned count,
223 unsigned startInstance, unsigned instanceCount)
224 {
225 struct nv50_context *nv50 = nv50_context(pipe);
226 struct nouveau_channel *chan = nv50->screen->tesla->channel;
227 struct nouveau_grobj *tesla = nv50->screen->tesla;
228 struct pipe_transfer *transfer;
229 struct instance a[16];
230 struct inline_ctx ctx;
231 struct u_split_prim s;
232 boolean nzi = FALSE;
233 unsigned overhead;
234
235 overhead = 16*3; /* potential instance adjustments */
236 overhead += 4; /* Begin()/End() */
237 overhead += 4; /* potential edgeflag disable/reenable */
238 overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
239
240 s.priv = &ctx;
241 if (indexSize == 1)
242 s.emit = inline_elt08;
243 else
244 if (indexSize == 2)
245 s.emit = inline_elt16;
246 else
247 s.emit = inline_elt32;
248 s.edge = inline_edgeflag;
249
250 ctx.nv50 = nv50;
251 ctx.map = pipe_buffer_map(pipe, indexBuffer, PIPE_TRANSFER_READ, &transfer);
252 assert(ctx.map);
253 if (!ctx.map)
254 return;
255
256 instance_init(nv50, a, startInstance);
257 if (!nv50_state_validate(nv50, overhead + 6 + 3))
258 return;
259
260 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
261 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
262 OUT_RING (chan, startInstance);
263 while (instanceCount--) {
264 unsigned max_verts;
265 boolean done;
266
267 u_split_prim_init(&s, mode, start, count);
268 do {
269 if (AVAIL_RING(chan) < (overhead + 6)) {
270 FIRE_RING(chan);
271 if (!nv50_state_validate(nv50, (overhead + 6))) {
272 assert(0);
273 return;
274 }
275 }
276
277 max_verts = AVAIL_RING(chan) - overhead;
278 if (max_verts > 2047)
279 max_verts = 2047;
280 if (indexSize != 4)
281 max_verts <<= 1;
282 instance_step(nv50, a);
283
284 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
285 OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
286 done = u_split_prim_next(&s, max_verts);
287 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
288 OUT_RING (chan, 0);
289 } while (!done);
290
291 nzi = TRUE;
292 }
293
294 pipe_buffer_unmap(pipe, indexBuffer, transfer);
295 }
296
297 void
298 nv50_draw_elements_instanced(struct pipe_context *pipe,
299 struct pipe_resource *indexBuffer,
300 unsigned indexSize, int indexBias,
301 unsigned mode, unsigned start, unsigned count,
302 unsigned startInstance, unsigned instanceCount)
303 {
304 struct nv50_context *nv50 = nv50_context(pipe);
305 struct nouveau_channel *chan = nv50->screen->tesla->channel;
306 struct nouveau_grobj *tesla = nv50->screen->tesla;
307 struct instance a[16];
308 unsigned prim = nv50_prim(mode);
309
310 instance_init(nv50, a, startInstance);
311 if (!nv50_state_validate(nv50, 13 + 16*3))
312 return;
313
314 if (nv50->vbo_fifo) {
315 nv50_push_elements_instanced(pipe, indexBuffer, indexSize,
316 indexBias, mode, start, count,
317 startInstance, instanceCount);
318 return;
319 }
320
321 /* indices are uint32 internally, so large indexBias means negative */
322 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_BASE, 1);
323 OUT_RING (chan, indexBias);
324
325 if (!nv50_resource_mapped_by_gpu(indexBuffer) || indexSize == 1) {
326 nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
327 mode, start, count, startInstance,
328 instanceCount);
329 return;
330 }
331
332 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
333 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
334 OUT_RING (chan, startInstance);
335 while (instanceCount--) {
336 if (AVAIL_RING(chan) < (7 + 16*3)) {
337 FIRE_RING(chan);
338 if (!nv50_state_validate(nv50, 10 + 16*3)) {
339 assert(0);
340 return;
341 }
342 }
343 instance_step(nv50, a);
344
345 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
346 OUT_RING (chan, prim);
347 if (indexSize == 4) {
348 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
349 OUT_RING (chan, count);
350 nouveau_pushbuf_submit(chan,
351 nv50_resource(indexBuffer)->bo,
352 start << 2, count << 2);
353 } else
354 if (indexSize == 2) {
355 unsigned vb_start = (start & ~1);
356 unsigned vb_end = (start + count + 1) & ~1;
357 unsigned dwords = (vb_end - vb_start) >> 1;
358
359 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
360 OUT_RING (chan, ((start & 1) << 31) | count);
361 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
362 OUT_RING (chan, dwords);
363 nouveau_pushbuf_submit(chan,
364 nv50_resource(indexBuffer)->bo,
365 vb_start << 1, dwords << 2);
366 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
367 OUT_RING (chan, 0);
368 }
369 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
370 OUT_RING (chan, 0);
371
372 prim |= (1 << 28);
373 }
374 }
375
376 void
377 nv50_draw_elements(struct pipe_context *pipe,
378 struct pipe_resource *indexBuffer,
379 unsigned indexSize, int indexBias,
380 unsigned mode, unsigned start, unsigned count)
381 {
382 nv50_draw_elements_instanced(pipe, indexBuffer, indexSize, indexBias,
383 mode, start, count, 0, 1);
384 }
385
386 static INLINE boolean
387 nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
388 struct nouveau_stateobj **pso,
389 struct pipe_vertex_element *ve,
390 struct pipe_vertex_buffer *vb)
391
392 {
393 struct nouveau_stateobj *so;
394 struct nouveau_grobj *tesla = nv50->screen->tesla;
395 struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
396 float v[4];
397 int ret;
398 unsigned nr_components = util_format_get_nr_components(ve->src_format);
399
400 ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
401 if (ret)
402 return FALSE;
403
404 util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map +
405 (vb->buffer_offset + ve->src_offset), 0,
406 0, 0, 1, 1);
407 so = *pso;
408 if (!so)
409 *pso = so = so_new(nv50->vtxelt->num_elements,
410 nv50->vtxelt->num_elements * 4, 0);
411
412 switch (nr_components) {
413 case 4:
414 so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
415 so_data (so, fui(v[0]));
416 so_data (so, fui(v[1]));
417 so_data (so, fui(v[2]));
418 so_data (so, fui(v[3]));
419 break;
420 case 3:
421 so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3);
422 so_data (so, fui(v[0]));
423 so_data (so, fui(v[1]));
424 so_data (so, fui(v[2]));
425 break;
426 case 2:
427 so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2);
428 so_data (so, fui(v[0]));
429 so_data (so, fui(v[1]));
430 break;
431 case 1:
432 if (attrib == nv50->vertprog->vp.edgeflag) {
433 so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
434 so_data (so, v[0] ? 1 : 0);
435 }
436 so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
437 so_data (so, fui(v[0]));
438 break;
439 default:
440 nouveau_bo_unmap(bo);
441 return FALSE;
442 }
443
444 nouveau_bo_unmap(bo);
445 return TRUE;
446 }
447
448 void
449 nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
450 {
451 unsigned i;
452
453 for (i = 0; i < cso->num_elements; ++i)
454 cso->hw[i] = nv50_format_table[cso->pipe[i].src_format].vtx;
455 }
456
457 struct nouveau_stateobj *
458 nv50_vbo_validate(struct nv50_context *nv50)
459 {
460 struct nouveau_grobj *tesla = nv50->screen->tesla;
461 struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr;
462 unsigned i, n_ve;
463
464 /* don't validate if Gallium took away our buffers */
465 if (nv50->vtxbuf_nr == 0)
466 return NULL;
467
468 nv50->vbo_fifo = 0;
469 if (nv50->screen->force_push ||
470 nv50->vertprog->vp.edgeflag < 16)
471 nv50->vbo_fifo = 0xffff;
472
473 for (i = 0; i < nv50->vtxbuf_nr; i++) {
474 if (nv50->vtxbuf[i].stride &&
475 !nv50_resource_mapped_by_gpu(nv50->vtxbuf[i].buffer))
476 nv50->vbo_fifo = 0xffff;
477 }
478
479 n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
480
481 vtxattr = NULL;
482 vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4);
483 vtxfmt = so_new(1, n_ve, 0);
484 so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
485
486 for (i = 0; i < nv50->vtxelt->num_elements; i++) {
487 struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
488 struct pipe_vertex_buffer *vb =
489 &nv50->vtxbuf[ve->vertex_buffer_index];
490 struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
491 uint32_t hw = nv50->vtxelt->hw[i];
492
493 if (!vb->stride &&
494 nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
495 so_data(vtxfmt, hw | (1 << 4));
496
497 so_method(vtxbuf, tesla,
498 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
499 so_data (vtxbuf, 0);
500
501 nv50->vbo_fifo &= ~(1 << i);
502 continue;
503 }
504
505 if (nv50->vbo_fifo) {
506 so_data (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i));
507 so_method(vtxbuf, tesla,
508 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
509 so_data (vtxbuf, 0);
510 continue;
511 }
512
513 so_data(vtxfmt, hw | i);
514
515 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
516 so_data (vtxbuf, 0x20000000 |
517 (ve->instance_divisor ? 0 : vb->stride));
518 so_reloc (vtxbuf, bo, vb->buffer_offset +
519 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
520 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
521 so_reloc (vtxbuf, bo, vb->buffer_offset +
522 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
523 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
524
525 /* vertex array limits */
526 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
527 so_reloc (vtxbuf, bo, vb->buffer->width0 - 1,
528 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
529 NOUVEAU_BO_HIGH, 0, 0);
530 so_reloc (vtxbuf, bo, vb->buffer->width0 - 1,
531 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
532 NOUVEAU_BO_LOW, 0, 0);
533 }
534 for (; i < n_ve; ++i) {
535 so_data (vtxfmt, 0x7e080010);
536
537 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
538 so_data (vtxbuf, 0);
539 }
540 nv50->state.vtxelt_nr = nv50->vtxelt->num_elements;
541
542 so_ref (vtxbuf, &nv50->state.vtxbuf);
543 so_ref (vtxattr, &nv50->state.vtxattr);
544 so_ref (NULL, &vtxbuf);
545 so_ref (NULL, &vtxattr);
546 return vtxfmt;
547 }
548
549