2 * Copyright 2008 Ben Skeggs
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "pipe/p_context.h"
24 #include "pipe/p_state.h"
25 #include "util/u_inlines.h"
26 #include "util/u_format.h"
28 #include "nouveau/nouveau_util.h"
29 #include "nv50_context.h"
31 #define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
33 static INLINE
uint32_t
34 nv50_vbo_type_to_hw(enum pipe_format format
)
36 const struct util_format_description
*desc
;
38 desc
= util_format_description(format
);
41 switch (desc
->channel
[0].type
) {
42 case UTIL_FORMAT_TYPE_FLOAT
:
43 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT
;
44 case UTIL_FORMAT_TYPE_UNSIGNED
:
45 if (desc
->channel
[0].normalized
) {
46 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM
;
48 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED
;
49 case UTIL_FORMAT_TYPE_SIGNED
:
50 if (desc
->channel
[0].normalized
) {
51 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM
;
53 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED
;
55 case PIPE_FORMAT_TYPE_UINT:
56 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
57 case PIPE_FORMAT_TYPE_SINT:
58 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */
64 static INLINE
uint32_t
65 nv50_vbo_size_to_hw(unsigned size
, unsigned nr_c
)
67 static const uint32_t hw_values
[] = {
69 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8
,
70 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8
,
71 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8
,
72 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8
,
73 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16
,
74 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16
,
75 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16
,
76 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16
,
78 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32
,
79 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32
,
80 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32
,
81 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32
};
83 /* we'd also have R11G11B10 and R10G10B10A2 */
85 assert(nr_c
> 0 && nr_c
<= 4);
91 return hw_values
[size
+ (nr_c
- 1)];
94 static INLINE
uint32_t
95 nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element
*ve
)
97 uint32_t hw_type
, hw_size
;
98 enum pipe_format pf
= ve
->src_format
;
99 const struct util_format_description
*desc
;
100 unsigned size
, nr_components
;
102 desc
= util_format_description(pf
);
105 size
= util_format_get_component_bits(pf
, UTIL_FORMAT_COLORSPACE_RGB
, 0);
106 nr_components
= util_format_get_nr_components(pf
);
108 hw_type
= nv50_vbo_type_to_hw(pf
);
109 hw_size
= nv50_vbo_size_to_hw(size
, nr_components
);
111 if (!hw_type
|| !hw_size
) {
112 NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf
));
117 if (desc
->swizzle
[0] == UTIL_FORMAT_SWIZZLE_Z
) /* BGRA */
118 hw_size
|= (1 << 31); /* no real swizzle bits :-( */
120 return (hw_type
| hw_size
);
124 struct nouveau_bo
*bo
;
132 instance_init(struct nv50_context
*nv50
, struct instance
*a
, unsigned first
)
136 for (i
= 0; i
< nv50
->vtxelt
->num_elements
; i
++) {
137 struct pipe_vertex_element
*ve
= &nv50
->vtxelt
->pipe
[i
];
138 struct pipe_vertex_buffer
*vb
;
140 a
[i
].divisor
= ve
->instance_divisor
;
142 vb
= &nv50
->vtxbuf
[ve
->vertex_buffer_index
];
144 a
[i
].bo
= nouveau_bo(vb
->buffer
);
145 a
[i
].stride
= vb
->stride
;
146 a
[i
].step
= first
% a
[i
].divisor
;
147 a
[i
].delta
= vb
->buffer_offset
+ ve
->src_offset
+
148 (first
* a
[i
].stride
);
154 instance_step(struct nv50_context
*nv50
, struct instance
*a
)
156 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
157 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
160 for (i
= 0; i
< nv50
->vtxelt
->num_elements
; i
++) {
164 BEGIN_RING(chan
, tesla
,
165 NV50TCL_VERTEX_ARRAY_START_HIGH(i
), 2);
166 OUT_RELOCh(chan
, a
[i
].bo
, a
[i
].delta
, NOUVEAU_BO_RD
|
167 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
);
168 OUT_RELOCl(chan
, a
[i
].bo
, a
[i
].delta
, NOUVEAU_BO_RD
|
169 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
);
170 if (++a
[i
].step
== a
[i
].divisor
) {
172 a
[i
].delta
+= a
[i
].stride
;
178 nv50_draw_arrays_instanced(struct pipe_context
*pipe
,
179 unsigned mode
, unsigned start
, unsigned count
,
180 unsigned startInstance
, unsigned instanceCount
)
182 struct nv50_context
*nv50
= nv50_context(pipe
);
183 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
184 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
185 struct instance a
[16];
186 unsigned prim
= nv50_prim(mode
);
188 instance_init(nv50
, a
, startInstance
);
189 if (!nv50_state_validate(nv50
, 10 + 16*3))
192 if (nv50
->vbo_fifo
) {
193 nv50_push_elements_instanced(pipe
, NULL
, 0, mode
, start
,
194 count
, startInstance
,
199 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 2);
200 OUT_RING (chan
, NV50_CB_AUX
| (24 << 8));
201 OUT_RING (chan
, startInstance
);
202 while (instanceCount
--) {
203 if (AVAIL_RING(chan
) < (7 + 16*3)) {
205 if (!nv50_state_validate(nv50
, 7 + 16*3)) {
210 instance_step(nv50
, a
);
212 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
213 OUT_RING (chan
, prim
);
214 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BUFFER_FIRST
, 2);
215 OUT_RING (chan
, start
);
216 OUT_RING (chan
, count
);
217 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
225 nv50_draw_arrays(struct pipe_context
*pipe
, unsigned mode
, unsigned start
,
228 nv50_draw_arrays_instanced(pipe
, mode
, start
, count
, 0, 1);
232 struct nv50_context
*nv50
;
237 inline_elt08(void *priv
, unsigned start
, unsigned count
)
239 struct inline_ctx
*ctx
= priv
;
240 struct nouveau_grobj
*tesla
= ctx
->nv50
->screen
->tesla
;
241 struct nouveau_channel
*chan
= tesla
->channel
;
242 uint8_t *map
= (uint8_t *)ctx
->map
+ start
;
245 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
, 1);
246 OUT_RING (chan
, map
[0]);
255 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VB_ELEMENT_U16
, count
);
257 OUT_RING(chan
, (map
[1] << 16) | map
[0]);
263 inline_elt16(void *priv
, unsigned start
, unsigned count
)
265 struct inline_ctx
*ctx
= priv
;
266 struct nouveau_grobj
*tesla
= ctx
->nv50
->screen
->tesla
;
267 struct nouveau_channel
*chan
= tesla
->channel
;
268 uint16_t *map
= (uint16_t *)ctx
->map
+ start
;
271 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
, 1);
272 OUT_RING (chan
, map
[0]);
281 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VB_ELEMENT_U16
, count
);
283 OUT_RING(chan
, (map
[1] << 16) | map
[0]);
289 inline_elt32(void *priv
, unsigned start
, unsigned count
)
291 struct inline_ctx
*ctx
= priv
;
292 struct nouveau_grobj
*tesla
= ctx
->nv50
->screen
->tesla
;
293 struct nouveau_channel
*chan
= tesla
->channel
;
295 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
, count
);
296 OUT_RINGp (chan
, (uint32_t *)ctx
->map
+ start
, count
);
300 inline_edgeflag(void *priv
, boolean enabled
)
302 struct inline_ctx
*ctx
= priv
;
303 struct nouveau_grobj
*tesla
= ctx
->nv50
->screen
->tesla
;
304 struct nouveau_channel
*chan
= tesla
->channel
;
306 BEGIN_RING(chan
, tesla
, NV50TCL_EDGEFLAG_ENABLE
, 1);
307 OUT_RING (chan
, enabled
? 1 : 0);
311 nv50_draw_elements_inline(struct pipe_context
*pipe
,
312 struct pipe_buffer
*indexBuffer
, unsigned indexSize
,
313 unsigned mode
, unsigned start
, unsigned count
,
314 unsigned startInstance
, unsigned instanceCount
)
316 struct pipe_screen
*pscreen
= pipe
->screen
;
317 struct nv50_context
*nv50
= nv50_context(pipe
);
318 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
319 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
320 struct instance a
[16];
321 struct inline_ctx ctx
;
322 struct u_split_prim s
;
326 overhead
= 16*3; /* potential instance adjustments */
327 overhead
+= 4; /* Begin()/End() */
328 overhead
+= 4; /* potential edgeflag disable/reenable */
329 overhead
+= 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
333 s
.emit
= inline_elt08
;
336 s
.emit
= inline_elt16
;
338 s
.emit
= inline_elt32
;
339 s
.edge
= inline_edgeflag
;
342 ctx
.map
= pipe_buffer_map(pscreen
, indexBuffer
, PIPE_BUFFER_USAGE_CPU_READ
);
347 instance_init(nv50
, a
, startInstance
);
348 if (!nv50_state_validate(nv50
, overhead
+ 6 + 3))
351 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 2);
352 OUT_RING (chan
, NV50_CB_AUX
| (24 << 8));
353 OUT_RING (chan
, startInstance
);
354 while (instanceCount
--) {
358 u_split_prim_init(&s
, mode
, start
, count
);
360 if (AVAIL_RING(chan
) < (overhead
+ 6)) {
362 if (!nv50_state_validate(nv50
, (overhead
+ 6))) {
368 max_verts
= AVAIL_RING(chan
) - overhead
;
369 if (max_verts
> 2047)
373 instance_step(nv50
, a
);
375 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
376 OUT_RING (chan
, nv50_prim(s
.mode
) | (nzi
? (1<<28) : 0));
377 done
= u_split_prim_next(&s
, max_verts
);
378 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
385 pipe_buffer_unmap(pscreen
, indexBuffer
);
389 nv50_draw_elements_instanced(struct pipe_context
*pipe
,
390 struct pipe_buffer
*indexBuffer
,
392 unsigned mode
, unsigned start
, unsigned count
,
393 unsigned startInstance
, unsigned instanceCount
)
395 struct nv50_context
*nv50
= nv50_context(pipe
);
396 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
397 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
398 struct instance a
[16];
399 unsigned prim
= nv50_prim(mode
);
401 instance_init(nv50
, a
, startInstance
);
402 if (!nv50_state_validate(nv50
, 13 + 16*3))
405 if (nv50
->vbo_fifo
) {
406 nv50_push_elements_instanced(pipe
, indexBuffer
, indexSize
,
407 mode
, start
, count
, startInstance
,
411 if (!(indexBuffer
->usage
& PIPE_BUFFER_USAGE_INDEX
) || indexSize
== 1) {
412 nv50_draw_elements_inline(pipe
, indexBuffer
, indexSize
,
413 mode
, start
, count
, startInstance
,
418 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 2);
419 OUT_RING (chan
, NV50_CB_AUX
| (24 << 8));
420 OUT_RING (chan
, startInstance
);
421 while (instanceCount
--) {
422 if (AVAIL_RING(chan
) < (7 + 16*3)) {
424 if (!nv50_state_validate(nv50
, 10 + 16*3)) {
429 instance_step(nv50
, a
);
431 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
432 OUT_RING (chan
, prim
);
433 if (indexSize
== 4) {
434 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
| 0x30000, 0);
435 OUT_RING (chan
, count
);
436 nouveau_pushbuf_submit(chan
, nouveau_bo(indexBuffer
),
437 start
<< 2, count
<< 2);
439 if (indexSize
== 2) {
440 unsigned vb_start
= (start
& ~1);
441 unsigned vb_end
= (start
+ count
+ 1) & ~1;
442 unsigned dwords
= (vb_end
- vb_start
) >> 1;
444 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U16_SETUP
, 1);
445 OUT_RING (chan
, ((start
& 1) << 31) | count
);
446 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U16
| 0x30000, 0);
447 OUT_RING (chan
, dwords
);
448 nouveau_pushbuf_submit(chan
, nouveau_bo(indexBuffer
),
449 vb_start
<< 1, dwords
<< 2);
450 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U16_SETUP
, 1);
453 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
461 nv50_draw_elements(struct pipe_context
*pipe
,
462 struct pipe_buffer
*indexBuffer
, unsigned indexSize
,
463 unsigned mode
, unsigned start
, unsigned count
)
465 nv50_draw_elements_instanced(pipe
, indexBuffer
, indexSize
,
466 mode
, start
, count
, 0, 1);
469 static INLINE boolean
470 nv50_vbo_static_attrib(struct nv50_context
*nv50
, unsigned attrib
,
471 struct nouveau_stateobj
**pso
,
472 struct pipe_vertex_element
*ve
,
473 struct pipe_vertex_buffer
*vb
)
476 struct nouveau_stateobj
*so
;
477 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
478 struct nouveau_bo
*bo
= nouveau_bo(vb
->buffer
);
481 unsigned nr_components
= util_format_get_nr_components(ve
->src_format
);
483 ret
= nouveau_bo_map(bo
, NOUVEAU_BO_RD
);
487 util_format_read_4f(ve
->src_format
, v
, 0, (uint8_t *)bo
->map
+
488 (vb
->buffer_offset
+ ve
->src_offset
), 0,
492 *pso
= so
= so_new(nv50
->vtxelt
->num_elements
,
493 nv50
->vtxelt
->num_elements
* 4, 0);
495 switch (nr_components
) {
497 so_method(so
, tesla
, NV50TCL_VTX_ATTR_4F_X(attrib
), 4);
498 so_data (so
, fui(v
[0]));
499 so_data (so
, fui(v
[1]));
500 so_data (so
, fui(v
[2]));
501 so_data (so
, fui(v
[3]));
504 so_method(so
, tesla
, NV50TCL_VTX_ATTR_3F_X(attrib
), 3);
505 so_data (so
, fui(v
[0]));
506 so_data (so
, fui(v
[1]));
507 so_data (so
, fui(v
[2]));
510 so_method(so
, tesla
, NV50TCL_VTX_ATTR_2F_X(attrib
), 2);
511 so_data (so
, fui(v
[0]));
512 so_data (so
, fui(v
[1]));
515 if (attrib
== nv50
->vertprog
->cfg
.edgeflag_in
) {
516 so_method(so
, tesla
, NV50TCL_EDGEFLAG_ENABLE
, 1);
517 so_data (so
, v
[0] ? 1 : 0);
519 so_method(so
, tesla
, NV50TCL_VTX_ATTR_1F(attrib
), 1);
520 so_data (so
, fui(v
[0]));
523 nouveau_bo_unmap(bo
);
527 nouveau_bo_unmap(bo
);
532 nv50_vtxelt_construct(struct nv50_vtxelt_stateobj
*cso
)
536 for (i
= 0; i
< cso
->num_elements
; ++i
) {
537 struct pipe_vertex_element
*ve
= &cso
->pipe
[i
];
539 cso
->hw
[i
] = nv50_vbo_vtxelt_to_hw(ve
);
543 struct nouveau_stateobj
*
544 nv50_vbo_validate(struct nv50_context
*nv50
)
546 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
547 struct nouveau_stateobj
*vtxbuf
, *vtxfmt
, *vtxattr
;
550 /* don't validate if Gallium took away our buffers */
551 if (nv50
->vtxbuf_nr
== 0)
554 if (nv50
->screen
->force_push
|| NV50_USING_LOATHED_EDGEFLAG(nv50
))
555 nv50
->vbo_fifo
= 0xffff;
557 for (i
= 0; i
< nv50
->vtxbuf_nr
; i
++) {
558 if (nv50
->vtxbuf
[i
].stride
&&
559 !(nv50
->vtxbuf
[i
].buffer
->usage
& PIPE_BUFFER_USAGE_VERTEX
))
560 nv50
->vbo_fifo
= 0xffff;
563 n_ve
= MAX2(nv50
->vtxelt
->num_elements
, nv50
->state
.vtxelt_nr
);
566 vtxbuf
= so_new(n_ve
* 2, n_ve
* 5, nv50
->vtxelt
->num_elements
* 4);
567 vtxfmt
= so_new(1, n_ve
, 0);
568 so_method(vtxfmt
, tesla
, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve
);
570 for (i
= 0; i
< nv50
->vtxelt
->num_elements
; i
++) {
571 struct pipe_vertex_element
*ve
= &nv50
->vtxelt
->pipe
[i
];
572 struct pipe_vertex_buffer
*vb
=
573 &nv50
->vtxbuf
[ve
->vertex_buffer_index
];
574 struct nouveau_bo
*bo
= nouveau_bo(vb
->buffer
);
575 uint32_t hw
= nv50
->vtxelt
->hw
[i
];
578 nv50_vbo_static_attrib(nv50
, i
, &vtxattr
, ve
, vb
)) {
579 so_data(vtxfmt
, hw
| (1 << 4));
581 so_method(vtxbuf
, tesla
,
582 NV50TCL_VERTEX_ARRAY_FORMAT(i
), 1);
585 nv50
->vbo_fifo
&= ~(1 << i
);
589 if (nv50
->vbo_fifo
) {
590 so_data (vtxfmt
, hw
| (ve
->instance_divisor
? (1 << 4) : i
));
591 so_method(vtxbuf
, tesla
,
592 NV50TCL_VERTEX_ARRAY_FORMAT(i
), 1);
597 so_data(vtxfmt
, hw
| i
);
599 so_method(vtxbuf
, tesla
, NV50TCL_VERTEX_ARRAY_FORMAT(i
), 3);
600 so_data (vtxbuf
, 0x20000000 |
601 (ve
->instance_divisor
? 0 : vb
->stride
));
602 so_reloc (vtxbuf
, bo
, vb
->buffer_offset
+
603 ve
->src_offset
, NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
|
604 NOUVEAU_BO_RD
| NOUVEAU_BO_HIGH
, 0, 0);
605 so_reloc (vtxbuf
, bo
, vb
->buffer_offset
+
606 ve
->src_offset
, NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
|
607 NOUVEAU_BO_RD
| NOUVEAU_BO_LOW
, 0, 0);
609 /* vertex array limits */
610 so_method(vtxbuf
, tesla
, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i
), 2);
611 so_reloc (vtxbuf
, bo
, vb
->buffer
->size
- 1,
612 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
| NOUVEAU_BO_RD
|
613 NOUVEAU_BO_HIGH
, 0, 0);
614 so_reloc (vtxbuf
, bo
, vb
->buffer
->size
- 1,
615 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
| NOUVEAU_BO_RD
|
616 NOUVEAU_BO_LOW
, 0, 0);
618 for (; i
< n_ve
; ++i
) {
619 so_data (vtxfmt
, 0x7e080010);
621 so_method(vtxbuf
, tesla
, NV50TCL_VERTEX_ARRAY_FORMAT(i
), 1);
624 nv50
->state
.vtxelt_nr
= nv50
->vtxelt
->num_elements
;
626 so_ref (vtxbuf
, &nv50
->state
.vtxbuf
);
627 so_ref (vtxattr
, &nv50
->state
.vtxattr
);
628 so_ref (NULL
, &vtxbuf
);
629 so_ref (NULL
, &vtxattr
);