8b9b5d0203cda4ebd52253b05a3db8f748f6007d
[mesa.git] / src / gallium / drivers / nvfx / nvfx_vbo.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "util/u_inlines.h"
4 #include "util/u_format.h"
5
6 #include "nvfx_context.h"
7 #include "nvfx_state.h"
8 #include "nvfx_resource.h"
9
10 #include "nouveau/nouveau_channel.h"
11 #include "nouveau/nouveau_pushbuf.h"
12 #include "nouveau/nouveau_util.h"
13
14 static INLINE int
15 nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
16 {
17 switch (pipe) {
18 case PIPE_FORMAT_R32_FLOAT:
19 case PIPE_FORMAT_R32G32_FLOAT:
20 case PIPE_FORMAT_R32G32B32_FLOAT:
21 case PIPE_FORMAT_R32G32B32A32_FLOAT:
22 *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
23 break;
24 case PIPE_FORMAT_R16_FLOAT:
25 case PIPE_FORMAT_R16G16_FLOAT:
26 case PIPE_FORMAT_R16G16B16_FLOAT:
27 case PIPE_FORMAT_R16G16B16A16_FLOAT:
28 *fmt = NV34TCL_VTXFMT_TYPE_HALF;
29 break;
30 case PIPE_FORMAT_R8_UNORM:
31 case PIPE_FORMAT_R8G8_UNORM:
32 case PIPE_FORMAT_R8G8B8_UNORM:
33 case PIPE_FORMAT_R8G8B8A8_UNORM:
34 *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
35 break;
36 case PIPE_FORMAT_R16_SSCALED:
37 case PIPE_FORMAT_R16G16_SSCALED:
38 case PIPE_FORMAT_R16G16B16_SSCALED:
39 case PIPE_FORMAT_R16G16B16A16_SSCALED:
40 *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
41 break;
42 default:
43 NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
44 return 1;
45 }
46
47 switch (pipe) {
48 case PIPE_FORMAT_R8_UNORM:
49 case PIPE_FORMAT_R32_FLOAT:
50 case PIPE_FORMAT_R16_FLOAT:
51 case PIPE_FORMAT_R16_SSCALED:
52 *ncomp = 1;
53 break;
54 case PIPE_FORMAT_R8G8_UNORM:
55 case PIPE_FORMAT_R32G32_FLOAT:
56 case PIPE_FORMAT_R16G16_FLOAT:
57 case PIPE_FORMAT_R16G16_SSCALED:
58 *ncomp = 2;
59 break;
60 case PIPE_FORMAT_R8G8B8_UNORM:
61 case PIPE_FORMAT_R32G32B32_FLOAT:
62 case PIPE_FORMAT_R16G16B16_FLOAT:
63 case PIPE_FORMAT_R16G16B16_SSCALED:
64 *ncomp = 3;
65 break;
66 case PIPE_FORMAT_R8G8B8A8_UNORM:
67 case PIPE_FORMAT_R32G32B32A32_FLOAT:
68 case PIPE_FORMAT_R16G16B16A16_FLOAT:
69 case PIPE_FORMAT_R16G16B16A16_SSCALED:
70 *ncomp = 4;
71 break;
72 default:
73 NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
74 return 1;
75 }
76
77 return 0;
78 }
79
80 static boolean
81 nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib,
82 unsigned ib_size)
83 {
84 struct pipe_screen *pscreen = &nvfx->screen->base.base;
85 unsigned type;
86
87 if (!ib) {
88 nvfx->idxbuf = NULL;
89 nvfx->idxbuf_format = 0xdeadbeef;
90 return FALSE;
91 }
92
93 if (nvfx->screen->eng3d->grclass != NV40TCL || ib_size == 1)
94 return FALSE;
95
96 switch (ib_size) {
97 case 2:
98 type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
99 break;
100 case 4:
101 type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
102 break;
103 default:
104 return FALSE;
105 }
106
107 if (ib != nvfx->idxbuf ||
108 type != nvfx->idxbuf_format) {
109 nvfx->dirty |= NVFX_NEW_ARRAYS;
110 nvfx->idxbuf = ib;
111 nvfx->idxbuf_format = type;
112 }
113
114 return TRUE;
115 }
116
117 // type must be floating point
118 static inline void
119 nvfx_vbo_static_attrib(struct nvfx_context *nvfx,
120 int attrib, struct pipe_vertex_element *ve,
121 struct pipe_vertex_buffer *vb, unsigned ncomp)
122 {
123 struct pipe_transfer *transfer;
124 struct nouveau_channel* chan = nvfx->screen->base.channel;
125 void *map;
126
127 map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer);
128 map += vb->buffer_offset + ve->src_offset;
129
130 float *v = map;
131
132 switch (ncomp) {
133 case 4:
134 OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
135 OUT_RING(chan, fui(v[0]));
136 OUT_RING(chan, fui(v[1]));
137 OUT_RING(chan, fui(v[2]));
138 OUT_RING(chan, fui(v[3]));
139 break;
140 case 3:
141 OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
142 OUT_RING(chan, fui(v[0]));
143 OUT_RING(chan, fui(v[1]));
144 OUT_RING(chan, fui(v[2]));
145 break;
146 case 2:
147 OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
148 OUT_RING(chan, fui(v[0]));
149 OUT_RING(chan, fui(v[1]));
150 break;
151 case 1:
152 OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
153 OUT_RING(chan, fui(v[0]));
154 break;
155 }
156
157 pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer);
158 }
159
160 void
161 nvfx_draw_arrays(struct pipe_context *pipe,
162 unsigned mode, unsigned start, unsigned count)
163 {
164 struct nvfx_context *nvfx = nvfx_context(pipe);
165 struct nvfx_screen *screen = nvfx->screen;
166 struct nouveau_channel *chan = screen->base.channel;
167 unsigned restart = 0;
168
169 nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
170 if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
171 nvfx_draw_elements_swtnl(pipe, NULL, 0,
172 mode, start, count);
173 return;
174 }
175
176 while (count) {
177 unsigned vc, nr;
178
179 nvfx_state_emit(nvfx);
180
181 unsigned avail = AVAIL_RING(chan);
182 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
183
184 vc = nouveau_vbuf_split(avail, 6, 256,
185 mode, start, count, &restart);
186 if (!vc) {
187 FIRE_RING(chan);
188 continue;
189 }
190
191 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
192 OUT_RING (chan, nvgl_primitive(mode));
193
194 nr = (vc & 0xff);
195 if (nr) {
196 OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1));
197 OUT_RING (chan, ((nr - 1) << 24) | start);
198 start += nr;
199 }
200
201 nr = vc >> 8;
202 while (nr) {
203 unsigned push = nr > 2047 ? 2047 : nr;
204
205 nr -= push;
206
207 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push));
208 while (push--) {
209 OUT_RING(chan, ((0x100 - 1) << 24) | start);
210 start += 0x100;
211 }
212 }
213
214 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
215 OUT_RING (chan, 0);
216
217 count -= vc;
218 start = restart;
219 }
220
221 pipe->flush(pipe, 0, NULL);
222 }
223
224 static INLINE void
225 nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
226 unsigned mode, unsigned start, unsigned count)
227 {
228 struct nvfx_screen *screen = nvfx->screen;
229 struct nouveau_channel *chan = screen->base.channel;
230
231 while (count) {
232 uint8_t *elts = (uint8_t *)ib + start;
233 unsigned vc, push, restart = 0;
234
235 nvfx_state_emit(nvfx);
236
237 unsigned avail = AVAIL_RING(chan);
238 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
239
240 vc = nouveau_vbuf_split(avail, 6, 2,
241 mode, start, count, &restart);
242 if (vc == 0) {
243 FIRE_RING(chan);
244 continue;
245 }
246 count -= vc;
247
248 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
249 OUT_RING (chan, nvgl_primitive(mode));
250
251 if (vc & 1) {
252 OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
253 OUT_RING (chan, elts[0]);
254 elts++; vc--;
255 }
256
257 while (vc) {
258 unsigned i;
259
260 push = MIN2(vc, 2047 * 2);
261
262 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
263 for (i = 0; i < push; i+=2)
264 OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
265
266 vc -= push;
267 elts += push;
268 }
269
270 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
271 OUT_RING (chan, 0);
272
273 start = restart;
274 }
275 }
276
277 static INLINE void
278 nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
279 unsigned mode, unsigned start, unsigned count)
280 {
281 struct nvfx_screen *screen = nvfx->screen;
282 struct nouveau_channel *chan = screen->base.channel;
283
284 while (count) {
285 uint16_t *elts = (uint16_t *)ib + start;
286 unsigned vc, push, restart = 0;
287
288 nvfx_state_emit(nvfx);
289
290 unsigned avail = AVAIL_RING(chan);
291 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
292
293 vc = nouveau_vbuf_split(avail, 6, 2,
294 mode, start, count, &restart);
295 if (vc == 0) {
296 FIRE_RING(chan);
297 continue;
298 }
299 count -= vc;
300
301 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
302 OUT_RING (chan, nvgl_primitive(mode));
303
304 if (vc & 1) {
305 OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
306 OUT_RING (chan, elts[0]);
307 elts++; vc--;
308 }
309
310 while (vc) {
311 unsigned i;
312
313 push = MIN2(vc, 2047 * 2);
314
315 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
316 for (i = 0; i < push; i+=2)
317 OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
318
319 vc -= push;
320 elts += push;
321 }
322
323 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
324 OUT_RING (chan, 0);
325
326 start = restart;
327 }
328 }
329
330 static INLINE void
331 nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
332 unsigned mode, unsigned start, unsigned count)
333 {
334 struct nvfx_screen *screen = nvfx->screen;
335 struct nouveau_channel *chan = screen->base.channel;
336
337 while (count) {
338 uint32_t *elts = (uint32_t *)ib + start;
339 unsigned vc, push, restart = 0;
340
341 nvfx_state_emit(nvfx);
342
343 unsigned avail = AVAIL_RING(chan);
344 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
345
346 vc = nouveau_vbuf_split(avail, 5, 1,
347 mode, start, count, &restart);
348 if (vc == 0) {
349 FIRE_RING(chan);
350 continue;
351 }
352 count -= vc;
353
354 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
355 OUT_RING (chan, nvgl_primitive(mode));
356
357 while (vc) {
358 push = MIN2(vc, 2047);
359
360 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
361 OUT_RINGp (chan, elts, push);
362
363 vc -= push;
364 elts += push;
365 }
366
367 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
368 OUT_RING (chan, 0);
369
370 start = restart;
371 }
372 }
373
374 static void
375 nvfx_draw_elements_inline(struct pipe_context *pipe,
376 struct pipe_resource *ib, unsigned ib_size,
377 unsigned mode, unsigned start, unsigned count)
378 {
379 struct nvfx_context *nvfx = nvfx_context(pipe);
380 struct pipe_transfer *transfer;
381 void *map;
382
383 map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer);
384 if (!ib) {
385 NOUVEAU_ERR("failed mapping ib\n");
386 return;
387 }
388
389 switch (ib_size) {
390 case 1:
391 nvfx_draw_elements_u08(nvfx, map, mode, start, count);
392 break;
393 case 2:
394 nvfx_draw_elements_u16(nvfx, map, mode, start, count);
395 break;
396 case 4:
397 nvfx_draw_elements_u32(nvfx, map, mode, start, count);
398 break;
399 default:
400 NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
401 break;
402 }
403
404 pipe_buffer_unmap(pipe, ib, transfer);
405 }
406
407 static void
408 nvfx_draw_elements_vbo(struct pipe_context *pipe,
409 unsigned mode, unsigned start, unsigned count)
410 {
411 struct nvfx_context *nvfx = nvfx_context(pipe);
412 struct nvfx_screen *screen = nvfx->screen;
413 struct nouveau_channel *chan = screen->base.channel;
414 unsigned restart = 0;
415
416 while (count) {
417 unsigned nr, vc;
418
419 nvfx_state_emit(nvfx);
420
421 unsigned avail = AVAIL_RING(chan);
422 avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
423
424 vc = nouveau_vbuf_split(avail, 6, 256,
425 mode, start, count, &restart);
426 if (!vc) {
427 FIRE_RING(chan);
428 continue;
429 }
430
431 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
432 OUT_RING (chan, nvgl_primitive(mode));
433
434 nr = (vc & 0xff);
435 if (nr) {
436 OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1));
437 OUT_RING (chan, ((nr - 1) << 24) | start);
438 start += nr;
439 }
440
441 nr = vc >> 8;
442 while (nr) {
443 unsigned push = nr > 2047 ? 2047 : nr;
444
445 nr -= push;
446
447 OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push));
448 while (push--) {
449 OUT_RING(chan, ((0x100 - 1) << 24) | start);
450 start += 0x100;
451 }
452 }
453
454 OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
455 OUT_RING (chan, 0);
456
457 count -= vc;
458 start = restart;
459 }
460 }
461
462 void
463 nvfx_draw_elements(struct pipe_context *pipe,
464 struct pipe_resource *indexBuffer, unsigned indexSize,
465 unsigned mode, unsigned start, unsigned count)
466 {
467 struct nvfx_context *nvfx = nvfx_context(pipe);
468 boolean idxbuf;
469
470 idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
471 if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
472 nvfx_draw_elements_swtnl(pipe, indexBuffer, indexSize,
473 mode, start, count);
474 return;
475 }
476
477 if (idxbuf) {
478 nvfx_draw_elements_vbo(pipe, mode, start, count);
479 } else {
480 nvfx_draw_elements_inline(pipe, indexBuffer, indexSize,
481 mode, start, count);
482 }
483
484 pipe->flush(pipe, 0, NULL);
485 }
486
487 boolean
488 nvfx_vbo_validate(struct nvfx_context *nvfx)
489 {
490 struct nouveau_channel* chan = nvfx->screen->base.channel;
491 struct pipe_resource *ib = nvfx->idxbuf;
492 unsigned ib_format = nvfx->idxbuf_format;
493 int i;
494 int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
495 uint32_t vtxfmt[16];
496 unsigned vb_flags = nvfx->screen->vertex_buffer_flags | NOUVEAU_BO_RD;
497
498 if (!elements)
499 return TRUE;
500
501 nvfx->vbo_bo = 0;
502
503 MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
504 for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
505 struct pipe_vertex_element *ve;
506 struct pipe_vertex_buffer *vb;
507 unsigned type, ncomp;
508
509 ve = &nvfx->vtxelt->pipe[i];
510 vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
511
512 if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
513 MARK_UNDO(chan);
514 nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
515 return FALSE;
516 }
517
518 if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) {
519 nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp);
520 vtxfmt[i] = type;
521 } else {
522 vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
523 (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type);
524 nvfx->vbo_bo |= (1 << i);
525 }
526 }
527
528 for(; i < elements; ++i)
529 vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT;
530
531 OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
532 OUT_RINGp(chan, vtxfmt, elements);
533
534 if(nvfx->is_nv4x) {
535 unsigned i;
536 /* seems to be some kind of cache flushing */
537 for(i = 0; i < 3; ++i) {
538 OUT_RING(chan, RING_3D(0x1718, 1));
539 OUT_RING(chan, 0);
540 }
541 }
542
543 OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
544 for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
545 struct pipe_vertex_element *ve;
546 struct pipe_vertex_buffer *vb;
547
548 ve = &nvfx->vtxelt->pipe[i];
549 vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
550
551 if (!(nvfx->vbo_bo & (1 << i)))
552 OUT_RING(chan, 0);
553 else
554 {
555 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
556 OUT_RELOC(chan, bo,
557 vb->buffer_offset + ve->src_offset,
558 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
559 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
560 }
561 }
562
563 for (; i < elements; i++)
564 OUT_RING(chan, 0);
565
566 OUT_RING(chan, RING_3D(0x1710, 1));
567 OUT_RING(chan, 0);
568
569 if (ib) {
570 struct nouveau_bo* bo = nvfx_resource(ib)->bo;
571
572 OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
573 OUT_RELOC(chan, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
574 OUT_RELOC(chan, bo, ib_format, vb_flags | NOUVEAU_BO_OR,
575 0, NV34TCL_IDXBUF_FORMAT_DMA1);
576 }
577
578 nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
579 return TRUE;
580 }
581
582 void
583 nvfx_vbo_relocate(struct nvfx_context *nvfx)
584 {
585 struct nouveau_channel* chan = nvfx->screen->base.channel;
586 unsigned vb_flags = nvfx->screen->vertex_buffer_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
587 int i;
588
589 MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
590 for(i = 0; i < nvfx->vtxelt->num_elements; ++i) {
591 if(nvfx->vbo_bo & (1 << i)) {
592 struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i];
593 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
594 struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
595 OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1),
596 vb_flags, 0, 0);
597 OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset,
598 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
599 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
600 }
601 }
602
603 if(nvfx->idxbuf)
604 {
605 struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo;
606
607 OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2),
608 vb_flags, 0, 0);
609 OUT_RELOC(chan, bo, 0,
610 vb_flags | NOUVEAU_BO_LOW, 0, 0);
611 OUT_RELOC(chan, bo, nvfx->idxbuf_format,
612 vb_flags | NOUVEAU_BO_OR,
613 0, NV34TCL_IDXBUF_FORMAT_DMA1);
614 }
615 }