2 * Copyright 2008 Ben Skeggs
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "pipe/p_context.h"
24 #include "pipe/p_state.h"
25 #include "util/u_inlines.h"
26 #include "util/u_format.h"
27 #include "util/u_split_prim.h"
29 #include "nv50_context.h"
30 #include "nv50_resource.h"
33 struct nouveau_bo
*bo
;
41 instance_init(struct nv50_context
*nv50
, struct instance
*a
, unsigned first
)
45 for (i
= 0; i
< nv50
->vtxelt
->num_elements
; i
++) {
46 struct pipe_vertex_element
*ve
= &nv50
->vtxelt
->pipe
[i
];
47 struct pipe_vertex_buffer
*vb
;
49 a
[i
].divisor
= ve
->instance_divisor
;
51 vb
= &nv50
->vtxbuf
[ve
->vertex_buffer_index
];
53 a
[i
].bo
= nv50_resource(vb
->buffer
)->bo
;
54 a
[i
].stride
= vb
->stride
;
55 a
[i
].step
= first
% a
[i
].divisor
;
56 a
[i
].delta
= vb
->buffer_offset
+ ve
->src_offset
+
57 (first
* a
[i
].stride
);
63 instance_step(struct nv50_context
*nv50
, struct instance
*a
)
65 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
66 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
69 for (i
= 0; i
< nv50
->vtxelt
->num_elements
; i
++) {
73 BEGIN_RING(chan
, tesla
,
74 NV50TCL_VERTEX_ARRAY_START_HIGH(i
), 2);
75 OUT_RELOCh(chan
, a
[i
].bo
, a
[i
].delta
, NOUVEAU_BO_RD
|
76 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
);
77 OUT_RELOCl(chan
, a
[i
].bo
, a
[i
].delta
, NOUVEAU_BO_RD
|
78 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
);
79 if (++a
[i
].step
== a
[i
].divisor
) {
81 a
[i
].delta
+= a
[i
].stride
;
87 nv50_draw_arrays_instanced(struct pipe_context
*pipe
,
88 unsigned mode
, unsigned start
, unsigned count
,
89 unsigned startInstance
, unsigned instanceCount
)
91 struct nv50_context
*nv50
= nv50_context(pipe
);
92 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
93 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
94 struct instance a
[16];
95 unsigned prim
= nv50_prim(mode
);
97 instance_init(nv50
, a
, startInstance
);
98 if (!nv50_state_validate(nv50
, 10 + 16*3))
101 if (nv50
->vbo_fifo
) {
102 nv50_push_elements_instanced(pipe
, NULL
, 0, 0, mode
, start
,
103 count
, startInstance
,
108 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 2);
109 OUT_RING (chan
, NV50_CB_AUX
| (24 << 8));
110 OUT_RING (chan
, startInstance
);
111 while (instanceCount
--) {
112 if (AVAIL_RING(chan
) < (7 + 16*3)) {
114 if (!nv50_state_validate(nv50
, 7 + 16*3)) {
119 instance_step(nv50
, a
);
121 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
122 OUT_RING (chan
, prim
);
123 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BUFFER_FIRST
, 2);
124 OUT_RING (chan
, start
);
125 OUT_RING (chan
, count
);
126 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
134 struct nv50_context
*nv50
;
139 inline_elt08(void *priv
, unsigned start
, unsigned count
)
141 struct inline_ctx
*ctx
= priv
;
142 struct nouveau_grobj
*tesla
= ctx
->nv50
->screen
->tesla
;
143 struct nouveau_channel
*chan
= tesla
->channel
;
144 uint8_t *map
= (uint8_t *)ctx
->map
+ start
;
147 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
, 1);
148 OUT_RING (chan
, map
[0]);
157 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VB_ELEMENT_U16
, count
);
159 OUT_RING(chan
, (map
[1] << 16) | map
[0]);
165 inline_elt16(void *priv
, unsigned start
, unsigned count
)
167 struct inline_ctx
*ctx
= priv
;
168 struct nouveau_grobj
*tesla
= ctx
->nv50
->screen
->tesla
;
169 struct nouveau_channel
*chan
= tesla
->channel
;
170 uint16_t *map
= (uint16_t *)ctx
->map
+ start
;
173 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
, 1);
174 OUT_RING (chan
, map
[0]);
183 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VB_ELEMENT_U16
, count
);
185 OUT_RING(chan
, (map
[1] << 16) | map
[0]);
191 inline_elt32(void *priv
, unsigned start
, unsigned count
)
193 struct inline_ctx
*ctx
= priv
;
194 struct nouveau_grobj
*tesla
= ctx
->nv50
->screen
->tesla
;
195 struct nouveau_channel
*chan
= tesla
->channel
;
197 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
, count
);
198 OUT_RINGp (chan
, (uint32_t *)ctx
->map
+ start
, count
);
202 inline_edgeflag(void *priv
, boolean enabled
)
204 struct inline_ctx
*ctx
= priv
;
205 struct nouveau_grobj
*tesla
= ctx
->nv50
->screen
->tesla
;
206 struct nouveau_channel
*chan
= tesla
->channel
;
208 BEGIN_RING(chan
, tesla
, NV50TCL_EDGEFLAG_ENABLE
, 1);
209 OUT_RING (chan
, enabled
? 1 : 0);
213 nv50_draw_elements_inline(struct pipe_context
*pipe
,
214 struct pipe_resource
*indexBuffer
, unsigned indexSize
,
215 unsigned mode
, unsigned start
, unsigned count
,
216 unsigned startInstance
, unsigned instanceCount
)
218 struct nv50_context
*nv50
= nv50_context(pipe
);
219 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
220 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
221 struct pipe_transfer
*transfer
;
222 struct instance a
[16];
223 struct inline_ctx ctx
;
224 struct util_split_prim s
;
228 overhead
= 16*3; /* potential instance adjustments */
229 overhead
+= 4; /* Begin()/End() */
230 overhead
+= 4; /* potential edgeflag disable/reenable */
231 overhead
+= 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
235 s
.emit
= inline_elt08
;
238 s
.emit
= inline_elt16
;
240 s
.emit
= inline_elt32
;
241 s
.edge
= inline_edgeflag
;
244 ctx
.map
= pipe_buffer_map(pipe
, indexBuffer
, PIPE_TRANSFER_READ
, &transfer
);
249 instance_init(nv50
, a
, startInstance
);
250 if (!nv50_state_validate(nv50
, overhead
+ 6 + 3))
253 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 2);
254 OUT_RING (chan
, NV50_CB_AUX
| (24 << 8));
255 OUT_RING (chan
, startInstance
);
256 while (instanceCount
--) {
260 util_split_prim_init(&s
, mode
, start
, count
);
262 if (AVAIL_RING(chan
) < (overhead
+ 6)) {
264 if (!nv50_state_validate(nv50
, (overhead
+ 6))) {
270 max_verts
= AVAIL_RING(chan
) - overhead
;
271 if (max_verts
> 2047)
275 instance_step(nv50
, a
);
277 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
278 OUT_RING (chan
, nv50_prim(s
.mode
) | (nzi
? (1<<28) : 0));
279 done
= util_split_prim_next(&s
, max_verts
);
280 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
287 pipe_buffer_unmap(pipe
, transfer
);
291 nv50_draw_elements_instanced(struct pipe_context
*pipe
,
292 struct pipe_resource
*indexBuffer
,
293 unsigned indexSize
, int indexBias
,
294 unsigned mode
, unsigned start
, unsigned count
,
295 unsigned startInstance
, unsigned instanceCount
)
297 struct nv50_context
*nv50
= nv50_context(pipe
);
298 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
299 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
300 struct instance a
[16];
301 unsigned prim
= nv50_prim(mode
);
303 instance_init(nv50
, a
, startInstance
);
304 if (!nv50_state_validate(nv50
, 13 + 16*3))
307 if (nv50
->vbo_fifo
) {
308 nv50_push_elements_instanced(pipe
, indexBuffer
, indexSize
,
309 indexBias
, mode
, start
, count
,
310 startInstance
, instanceCount
);
314 /* indices are uint32 internally, so large indexBias means negative */
315 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_BASE
, 1);
316 OUT_RING (chan
, indexBias
);
318 if (!nv50_resource_mapped_by_gpu(indexBuffer
) || indexSize
== 1) {
319 nv50_draw_elements_inline(pipe
, indexBuffer
, indexSize
,
320 mode
, start
, count
, startInstance
,
325 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 2);
326 OUT_RING (chan
, NV50_CB_AUX
| (24 << 8));
327 OUT_RING (chan
, startInstance
);
328 while (instanceCount
--) {
329 if (AVAIL_RING(chan
) < (7 + 16*3)) {
331 if (!nv50_state_validate(nv50
, 10 + 16*3)) {
336 instance_step(nv50
, a
);
338 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
339 OUT_RING (chan
, prim
);
340 if (indexSize
== 4) {
341 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
| 0x30000, 0);
342 OUT_RING (chan
, count
);
343 nouveau_pushbuf_submit(chan
,
344 nv50_resource(indexBuffer
)->bo
,
345 start
<< 2, count
<< 2);
347 if (indexSize
== 2) {
348 unsigned vb_start
= (start
& ~1);
349 unsigned vb_end
= (start
+ count
+ 1) & ~1;
350 unsigned dwords
= (vb_end
- vb_start
) >> 1;
352 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U16_SETUP
, 1);
353 OUT_RING (chan
, ((start
& 1) << 31) | count
);
354 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U16
| 0x30000, 0);
355 OUT_RING (chan
, dwords
);
356 nouveau_pushbuf_submit(chan
,
357 nv50_resource(indexBuffer
)->bo
,
358 vb_start
<< 1, dwords
<< 2);
359 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U16_SETUP
, 1);
362 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
370 nv50_draw_vbo(struct pipe_context
*pipe
, const struct pipe_draw_info
*info
)
372 struct nv50_context
*nv50
= nv50_context(pipe
);
374 if (info
->indexed
&& nv50
->idxbuf
.buffer
) {
377 assert(nv50
->idxbuf
.offset
% nv50
->idxbuf
.index_size
== 0);
378 offset
= nv50
->idxbuf
.offset
/ nv50
->idxbuf
.index_size
;
380 nv50_draw_elements_instanced(pipe
,
382 nv50
->idxbuf
.index_size
,
385 info
->start
+ offset
,
387 info
->start_instance
,
388 info
->instance_count
);
391 nv50_draw_arrays_instanced(pipe
,
395 info
->start_instance
,
396 info
->instance_count
);
400 static INLINE boolean
401 nv50_vbo_static_attrib(struct nv50_context
*nv50
, unsigned attrib
,
402 struct nouveau_stateobj
**pso
,
403 struct pipe_vertex_element
*ve
,
404 struct pipe_vertex_buffer
*vb
)
407 struct nouveau_stateobj
*so
;
408 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
409 struct nouveau_bo
*bo
= nv50_resource(vb
->buffer
)->bo
;
412 unsigned nr_components
= util_format_get_nr_components(ve
->src_format
);
414 ret
= nouveau_bo_map(bo
, NOUVEAU_BO_RD
);
418 util_format_read_4f(ve
->src_format
, v
, 0, (uint8_t *)bo
->map
+
419 (vb
->buffer_offset
+ ve
->src_offset
), 0,
423 *pso
= so
= so_new(nv50
->vtxelt
->num_elements
,
424 nv50
->vtxelt
->num_elements
* 4, 0);
426 switch (nr_components
) {
428 so_method(so
, tesla
, NV50TCL_VTX_ATTR_4F_X(attrib
), 4);
429 so_data (so
, fui(v
[0]));
430 so_data (so
, fui(v
[1]));
431 so_data (so
, fui(v
[2]));
432 so_data (so
, fui(v
[3]));
435 so_method(so
, tesla
, NV50TCL_VTX_ATTR_3F_X(attrib
), 3);
436 so_data (so
, fui(v
[0]));
437 so_data (so
, fui(v
[1]));
438 so_data (so
, fui(v
[2]));
441 so_method(so
, tesla
, NV50TCL_VTX_ATTR_2F_X(attrib
), 2);
442 so_data (so
, fui(v
[0]));
443 so_data (so
, fui(v
[1]));
446 if (attrib
== nv50
->vertprog
->vp
.edgeflag
) {
447 so_method(so
, tesla
, NV50TCL_EDGEFLAG_ENABLE
, 1);
448 so_data (so
, v
[0] ? 1 : 0);
450 so_method(so
, tesla
, NV50TCL_VTX_ATTR_1F(attrib
), 1);
451 so_data (so
, fui(v
[0]));
454 nouveau_bo_unmap(bo
);
458 nouveau_bo_unmap(bo
);
463 nv50_vtxelt_construct(struct nv50_vtxelt_stateobj
*cso
)
467 for (i
= 0; i
< cso
->num_elements
; ++i
)
468 cso
->hw
[i
] = nv50_format_table
[cso
->pipe
[i
].src_format
].vtx
;
471 struct nouveau_stateobj
*
472 nv50_vbo_validate(struct nv50_context
*nv50
)
474 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
475 struct nouveau_stateobj
*vtxbuf
, *vtxfmt
, *vtxattr
;
478 /* don't validate if Gallium took away our buffers */
479 if (nv50
->vtxbuf_nr
== 0)
483 if (nv50
->screen
->force_push
||
484 nv50
->vertprog
->vp
.edgeflag
< 16)
485 nv50
->vbo_fifo
= 0xffff;
487 for (i
= 0; i
< nv50
->vtxbuf_nr
; i
++) {
488 if (nv50
->vtxbuf
[i
].stride
&&
489 !nv50_resource_mapped_by_gpu(nv50
->vtxbuf
[i
].buffer
))
490 nv50
->vbo_fifo
= 0xffff;
493 n_ve
= MAX2(nv50
->vtxelt
->num_elements
, nv50
->state
.vtxelt_nr
);
496 vtxbuf
= so_new(n_ve
* 2, n_ve
* 5, nv50
->vtxelt
->num_elements
* 4);
497 vtxfmt
= so_new(1, n_ve
, 0);
498 so_method(vtxfmt
, tesla
, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve
);
500 for (i
= 0; i
< nv50
->vtxelt
->num_elements
; i
++) {
501 struct pipe_vertex_element
*ve
= &nv50
->vtxelt
->pipe
[i
];
502 struct pipe_vertex_buffer
*vb
=
503 &nv50
->vtxbuf
[ve
->vertex_buffer_index
];
504 struct nouveau_bo
*bo
= nv50_resource(vb
->buffer
)->bo
;
505 uint32_t hw
= nv50
->vtxelt
->hw
[i
];
508 nv50_vbo_static_attrib(nv50
, i
, &vtxattr
, ve
, vb
)) {
509 so_data(vtxfmt
, hw
| (1 << 4));
511 so_method(vtxbuf
, tesla
,
512 NV50TCL_VERTEX_ARRAY_FORMAT(i
), 1);
515 nv50
->vbo_fifo
&= ~(1 << i
);
519 if (nv50
->vbo_fifo
) {
520 so_data (vtxfmt
, hw
| (ve
->instance_divisor
? (1 << 4) : i
));
521 so_method(vtxbuf
, tesla
,
522 NV50TCL_VERTEX_ARRAY_FORMAT(i
), 1);
527 so_data(vtxfmt
, hw
| i
);
529 so_method(vtxbuf
, tesla
, NV50TCL_VERTEX_ARRAY_FORMAT(i
), 3);
530 so_data (vtxbuf
, 0x20000000 |
531 (ve
->instance_divisor
? 0 : vb
->stride
));
532 so_reloc (vtxbuf
, bo
, vb
->buffer_offset
+
533 ve
->src_offset
, NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
|
534 NOUVEAU_BO_RD
| NOUVEAU_BO_HIGH
, 0, 0);
535 so_reloc (vtxbuf
, bo
, vb
->buffer_offset
+
536 ve
->src_offset
, NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
|
537 NOUVEAU_BO_RD
| NOUVEAU_BO_LOW
, 0, 0);
539 /* vertex array limits */
540 so_method(vtxbuf
, tesla
, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i
), 2);
541 so_reloc (vtxbuf
, bo
, vb
->buffer
->width0
- 1,
542 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
| NOUVEAU_BO_RD
|
543 NOUVEAU_BO_HIGH
, 0, 0);
544 so_reloc (vtxbuf
, bo
, vb
->buffer
->width0
- 1,
545 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
| NOUVEAU_BO_RD
|
546 NOUVEAU_BO_LOW
, 0, 0);
548 for (; i
< n_ve
; ++i
) {
549 so_data (vtxfmt
, 0x7e080010);
551 so_method(vtxbuf
, tesla
, NV50TCL_VERTEX_ARRAY_FORMAT(i
), 1);
554 nv50
->state
.vtxelt_nr
= nv50
->vtxelt
->num_elements
;
556 so_ref (vtxbuf
, &nv50
->state
.vtxbuf
);
557 so_ref (vtxattr
, &nv50
->state
.vtxattr
);
558 so_ref (NULL
, &vtxbuf
);
559 so_ref (NULL
, &vtxattr
);