nvfx: so->RING_3D: vbo
[mesa.git] / src / gallium / drivers / nvfx / nvfx_vbo.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "util/u_inlines.h"
4 #include "util/u_format.h"
5
6 #include "nvfx_context.h"
7 #include "nvfx_state.h"
8 #include "nvfx_resource.h"
9
10 #include "nouveau/nouveau_channel.h"
11 #include "nouveau/nouveau_pushbuf.h"
12 #include "nouveau/nouveau_util.h"
13
14 static boolean
15 nvfx_force_swtnl(struct nvfx_context *nvfx)
16 {
17 static int force_swtnl = -1;
18 if(force_swtnl < 0)
19 force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", 0);
20 return force_swtnl;
21 }
22
23 static INLINE int
24 nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
25 {
26 switch (pipe) {
27 case PIPE_FORMAT_R32_FLOAT:
28 case PIPE_FORMAT_R32G32_FLOAT:
29 case PIPE_FORMAT_R32G32B32_FLOAT:
30 case PIPE_FORMAT_R32G32B32A32_FLOAT:
31 *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
32 break;
33 case PIPE_FORMAT_R8_UNORM:
34 case PIPE_FORMAT_R8G8_UNORM:
35 case PIPE_FORMAT_R8G8B8_UNORM:
36 case PIPE_FORMAT_R8G8B8A8_UNORM:
37 *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
38 break;
39 case PIPE_FORMAT_R16_SSCALED:
40 case PIPE_FORMAT_R16G16_SSCALED:
41 case PIPE_FORMAT_R16G16B16_SSCALED:
42 case PIPE_FORMAT_R16G16B16A16_SSCALED:
43 *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
44 break;
45 default:
46 NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
47 return 1;
48 }
49
50 switch (pipe) {
51 case PIPE_FORMAT_R8_UNORM:
52 case PIPE_FORMAT_R32_FLOAT:
53 case PIPE_FORMAT_R16_SSCALED:
54 *ncomp = 1;
55 break;
56 case PIPE_FORMAT_R8G8_UNORM:
57 case PIPE_FORMAT_R32G32_FLOAT:
58 case PIPE_FORMAT_R16G16_SSCALED:
59 *ncomp = 2;
60 break;
61 case PIPE_FORMAT_R8G8B8_UNORM:
62 case PIPE_FORMAT_R32G32B32_FLOAT:
63 case PIPE_FORMAT_R16G16B16_SSCALED:
64 *ncomp = 3;
65 break;
66 case PIPE_FORMAT_R8G8B8A8_UNORM:
67 case PIPE_FORMAT_R32G32B32A32_FLOAT:
68 case PIPE_FORMAT_R16G16B16A16_SSCALED:
69 *ncomp = 4;
70 break;
71 default:
72 NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
73 return 1;
74 }
75
76 return 0;
77 }
78
79 static boolean
80 nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib,
81 unsigned ib_size)
82 {
83 struct pipe_screen *pscreen = &nvfx->screen->base.base;
84 unsigned type;
85
86 if (!ib) {
87 nvfx->idxbuf = NULL;
88 nvfx->idxbuf_format = 0xdeadbeef;
89 return FALSE;
90 }
91
92 if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1)
93 return FALSE;
94
95 switch (ib_size) {
96 case 2:
97 type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
98 break;
99 case 4:
100 type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
101 break;
102 default:
103 return FALSE;
104 }
105
106 if (ib != nvfx->idxbuf ||
107 type != nvfx->idxbuf_format) {
108 nvfx->dirty |= NVFX_NEW_ARRAYS;
109 nvfx->idxbuf = ib;
110 nvfx->idxbuf_format = type;
111 }
112
113 return TRUE;
114 }
115
116 // type must be floating point
117 static inline void
118 nvfx_vbo_static_attrib(struct nvfx_context *nvfx,
119 int attrib, struct pipe_vertex_element *ve,
120 struct pipe_vertex_buffer *vb, unsigned ncomp)
121 {
122 struct pipe_transfer *transfer;
123 struct nouveau_channel* chan = nvfx->screen->base.channel;
124 void *map;
125
126 map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer);
127 map += vb->buffer_offset + ve->src_offset;
128
129 float *v = map;
130
131 switch (ncomp) {
132 case 4:
133 OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
134 OUT_RING(chan, fui(v[0]));
135 OUT_RING(chan, fui(v[1]));
136 OUT_RING(chan, fui(v[2]));
137 OUT_RING(chan, fui(v[3]));
138 break;
139 case 3:
140 OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
141 OUT_RING(chan, fui(v[0]));
142 OUT_RING(chan, fui(v[1]));
143 OUT_RING(chan, fui(v[2]));
144 break;
145 case 2:
146 OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
147 OUT_RING(chan, fui(v[0]));
148 OUT_RING(chan, fui(v[1]));
149 break;
150 case 1:
151 OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
152 OUT_RING(chan, fui(v[0]));
153 break;
154 }
155
156 pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer);
157 }
158
159 void
160 nvfx_draw_arrays(struct pipe_context *pipe,
161 unsigned mode, unsigned start, unsigned count)
162 {
163 struct nvfx_context *nvfx = nvfx_context(pipe);
164 struct nvfx_screen *screen = nvfx->screen;
165 struct nouveau_channel *chan = screen->base.channel;
166 unsigned restart = 0;
167
168 nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
169 if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) {
170 nvfx_draw_elements_swtnl(pipe, NULL, 0,
171 mode, start, count);
172 return;
173 }
174
175 while (count) {
176 unsigned vc, nr;
177
178 nvfx_state_emit(nvfx);
179
180 unsigned avail = AVAIL_RING(chan);
181 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
182
183 vc = nouveau_vbuf_split(avail, 6, 256,
184 mode, start, count, &restart);
185 if (!vc) {
186 FIRE_RING(chan);
187 continue;
188 }
189
190 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
191 OUT_RING (chan, nvgl_primitive(mode));
192
193 nr = (vc & 0xff);
194 if (nr) {
195 OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1));
196 OUT_RING (chan, ((nr - 1) << 24) | start);
197 start += nr;
198 }
199
200 nr = vc >> 8;
201 while (nr) {
202 unsigned push = nr > 2047 ? 2047 : nr;
203
204 nr -= push;
205
206 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push));
207 while (push--) {
208 OUT_RING(chan, ((0x100 - 1) << 24) | start);
209 start += 0x100;
210 }
211 }
212
213 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
214 OUT_RING (chan, 0);
215
216 count -= vc;
217 start = restart;
218 }
219
220 pipe->flush(pipe, 0, NULL);
221 }
222
223 static INLINE void
224 nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
225 unsigned mode, unsigned start, unsigned count)
226 {
227 struct nvfx_screen *screen = nvfx->screen;
228 struct nouveau_channel *chan = screen->base.channel;
229
230 while (count) {
231 uint8_t *elts = (uint8_t *)ib + start;
232 unsigned vc, push, restart = 0;
233
234 nvfx_state_emit(nvfx);
235
236 unsigned avail = AVAIL_RING(chan);
237 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
238
239 vc = nouveau_vbuf_split(avail, 6, 2,
240 mode, start, count, &restart);
241 if (vc == 0) {
242 FIRE_RING(chan);
243 continue;
244 }
245 count -= vc;
246
247 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
248 OUT_RING (chan, nvgl_primitive(mode));
249
250 if (vc & 1) {
251 OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
252 OUT_RING (chan, elts[0]);
253 elts++; vc--;
254 }
255
256 while (vc) {
257 unsigned i;
258
259 push = MIN2(vc, 2047 * 2);
260
261 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
262 for (i = 0; i < push; i+=2)
263 OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
264
265 vc -= push;
266 elts += push;
267 }
268
269 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
270 OUT_RING (chan, 0);
271
272 start = restart;
273 }
274 }
275
276 static INLINE void
277 nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
278 unsigned mode, unsigned start, unsigned count)
279 {
280 struct nvfx_screen *screen = nvfx->screen;
281 struct nouveau_channel *chan = screen->base.channel;
282
283 while (count) {
284 uint16_t *elts = (uint16_t *)ib + start;
285 unsigned vc, push, restart = 0;
286
287 nvfx_state_emit(nvfx);
288
289 unsigned avail = AVAIL_RING(chan);
290 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
291
292 vc = nouveau_vbuf_split(avail, 6, 2,
293 mode, start, count, &restart);
294 if (vc == 0) {
295 FIRE_RING(chan);
296 continue;
297 }
298 count -= vc;
299
300 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
301 OUT_RING (chan, nvgl_primitive(mode));
302
303 if (vc & 1) {
304 OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
305 OUT_RING (chan, elts[0]);
306 elts++; vc--;
307 }
308
309 while (vc) {
310 unsigned i;
311
312 push = MIN2(vc, 2047 * 2);
313
314 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
315 for (i = 0; i < push; i+=2)
316 OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
317
318 vc -= push;
319 elts += push;
320 }
321
322 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
323 OUT_RING (chan, 0);
324
325 start = restart;
326 }
327 }
328
329 static INLINE void
330 nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
331 unsigned mode, unsigned start, unsigned count)
332 {
333 struct nvfx_screen *screen = nvfx->screen;
334 struct nouveau_channel *chan = screen->base.channel;
335
336 while (count) {
337 uint32_t *elts = (uint32_t *)ib + start;
338 unsigned vc, push, restart = 0;
339
340 nvfx_state_emit(nvfx);
341
342 unsigned avail = AVAIL_RING(chan);
343 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
344
345 vc = nouveau_vbuf_split(avail, 5, 1,
346 mode, start, count, &restart);
347 if (vc == 0) {
348 FIRE_RING(chan);
349 continue;
350 }
351 count -= vc;
352
353 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
354 OUT_RING (chan, nvgl_primitive(mode));
355
356 while (vc) {
357 push = MIN2(vc, 2047);
358
359 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
360 OUT_RINGp (chan, elts, push);
361
362 vc -= push;
363 elts += push;
364 }
365
366 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
367 OUT_RING (chan, 0);
368
369 start = restart;
370 }
371 }
372
373 static void
374 nvfx_draw_elements_inline(struct pipe_context *pipe,
375 struct pipe_resource *ib, unsigned ib_size,
376 unsigned mode, unsigned start, unsigned count)
377 {
378 struct nvfx_context *nvfx = nvfx_context(pipe);
379 struct pipe_transfer *transfer;
380 void *map;
381
382 map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer);
383 if (!ib) {
384 NOUVEAU_ERR("failed mapping ib\n");
385 return;
386 }
387
388 switch (ib_size) {
389 case 1:
390 nvfx_draw_elements_u08(nvfx, map, mode, start, count);
391 break;
392 case 2:
393 nvfx_draw_elements_u16(nvfx, map, mode, start, count);
394 break;
395 case 4:
396 nvfx_draw_elements_u32(nvfx, map, mode, start, count);
397 break;
398 default:
399 NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
400 break;
401 }
402
403 pipe_buffer_unmap(pipe, ib, transfer);
404 }
405
406 static void
407 nvfx_draw_elements_vbo(struct pipe_context *pipe,
408 unsigned mode, unsigned start, unsigned count)
409 {
410 struct nvfx_context *nvfx = nvfx_context(pipe);
411 struct nvfx_screen *screen = nvfx->screen;
412 struct nouveau_channel *chan = screen->base.channel;
413 unsigned restart = 0;
414
415 while (count) {
416 unsigned nr, vc;
417
418 nvfx_state_emit(nvfx);
419
420 unsigned avail = AVAIL_RING(chan);
421 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
422
423 vc = nouveau_vbuf_split(avail, 6, 256,
424 mode, start, count, &restart);
425 if (!vc) {
426 FIRE_RING(chan);
427 continue;
428 }
429
430 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
431 OUT_RING (chan, nvgl_primitive(mode));
432
433 nr = (vc & 0xff);
434 if (nr) {
435 OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1));
436 OUT_RING (chan, ((nr - 1) << 24) | start);
437 start += nr;
438 }
439
440 nr = vc >> 8;
441 while (nr) {
442 unsigned push = nr > 2047 ? 2047 : nr;
443
444 nr -= push;
445
446 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push));
447 while (push--) {
448 OUT_RING(chan, ((0x100 - 1) << 24) | start);
449 start += 0x100;
450 }
451 }
452
453 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
454 OUT_RING (chan, 0);
455
456 count -= vc;
457 start = restart;
458 }
459 }
460
461 void
462 nvfx_draw_elements(struct pipe_context *pipe,
463 struct pipe_resource *indexBuffer, unsigned indexSize,
464 unsigned mode, unsigned start, unsigned count)
465 {
466 struct nvfx_context *nvfx = nvfx_context(pipe);
467 boolean idxbuf;
468
469 idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
470 if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) {
471 nvfx_draw_elements_swtnl(pipe, indexBuffer, indexSize,
472 mode, start, count);
473 return;
474 }
475
476 if (idxbuf) {
477 nvfx_draw_elements_vbo(pipe, mode, start, count);
478 } else {
479 nvfx_draw_elements_inline(pipe, indexBuffer, indexSize,
480 mode, start, count);
481 }
482
483 pipe->flush(pipe, 0, NULL);
484 }
485
486 boolean
487 nvfx_vbo_validate(struct nvfx_context *nvfx)
488 {
489 struct nouveau_channel* chan = nvfx->screen->base.channel;
490 struct pipe_resource *ib = nvfx->idxbuf;
491 unsigned ib_format = nvfx->idxbuf_format;
492 int i;
493 int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
494 unsigned long vtxfmt[16];
495 unsigned vb_flags = nvfx->screen->vertex_buffer_flags | NOUVEAU_BO_RD;
496
497 if (!elements)
498 return TRUE;
499
500 nvfx->vbo_bo = 0;
501
502 MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
503 for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
504 struct pipe_vertex_element *ve;
505 struct pipe_vertex_buffer *vb;
506 unsigned type, ncomp;
507
508 ve = &nvfx->vtxelt->pipe[i];
509 vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
510
511 if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
512 MARK_UNDO(chan);
513 nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
514 return FALSE;
515 }
516
517 if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) {
518 nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp);
519 vtxfmt[i] = type;
520 } else {
521 vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
522 (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type);
523 nvfx->vbo_bo |= (1 << i);
524 }
525 }
526
527 for(; i < elements; ++i)
528 vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT;
529
530 OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
531 OUT_RINGp(chan, vtxfmt, elements);
532
533 if(nvfx->is_nv4x) {
534 unsigned i;
535 /* seems to be some kind of cache flushing */
536 for(i = 0; i < 3; ++i) {
537 OUT_RING(chan, RING_3D(0x1718, 1));
538 OUT_RING(chan, 0);
539 }
540 }
541
542 OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
543 for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
544 struct pipe_vertex_element *ve;
545 struct pipe_vertex_buffer *vb;
546
547 ve = &nvfx->vtxelt->pipe[i];
548 vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
549
550 if (!(nvfx->vbo_bo & (1 << i)))
551 OUT_RING(chan, 0);
552 else
553 {
554 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
555 OUT_RELOC(chan, bo,
556 vb->buffer_offset + ve->src_offset,
557 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
558 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
559 }
560 }
561
562 for (; i < elements; i++)
563 OUT_RING(chan, 0);
564
565 OUT_RING(chan, RING_3D(0x1710, 1));
566 OUT_RING(chan, 0);
567
568 if (ib) {
569 struct nouveau_bo* bo = nvfx_resource(ib)->bo;
570
571 OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
572 OUT_RELOC(chan, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
573 OUT_RELOC(chan, bo, ib_format, vb_flags | NOUVEAU_BO_OR,
574 0, NV34TCL_IDXBUF_FORMAT_DMA1);
575 }
576
577 nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
578 return FALSE;
579 }
580
581 struct nvfx_state_entry nvfx_state_vbo = {
582 .validate = nvfx_vbo_validate,
583 .dirty = {
584 .pipe = NVFX_NEW_ARRAYS,
585 .hw = 0,
586 }
587 };
588
589 void
590 nvfx_vbo_relocate(struct nvfx_context *nvfx)
591 {
592 struct nouveau_channel* chan = nvfx->screen->base.channel;
593 unsigned vb_flags = nvfx->screen->vertex_buffer_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
594 int i;
595
596 MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
597 for(i = 0; i < nvfx->vtxelt->num_elements; ++i) {
598 if(nvfx->vbo_bo & (1 << i)) {
599 struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i];
600 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
601 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
602 OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1),
603 vb_flags, 0, 0);
604 OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset,
605 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
606 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
607 }
608 }
609
610 if(nvfx->idxbuf)
611 {
612 struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo;
613
614 OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2),
615 vb_flags, 0, 0);
616 OUT_RELOC(chan, bo, 0,
617 vb_flags | NOUVEAU_BO_LOW, 0, 0);
618 OUT_RELOC(chan, bo, nvfx->idxbuf_format,
619 vb_flags | NOUVEAU_BO_OR,
620 0, NV34TCL_IDXBUF_FORMAT_DMA1);
621 }
622 }