2 * Copyright 2008 Ben Skeggs
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "pipe/p_context.h"
24 #include "pipe/p_state.h"
25 #include "pipe/p_inlines.h"
27 #include "util/u_format.h"
29 #include "nv50_context.h"
32 nv50_push_elements_u08(struct nv50_context
*, uint8_t *, unsigned);
35 nv50_push_elements_u16(struct nv50_context
*, uint16_t *, unsigned);
38 nv50_push_elements_u32(struct nv50_context
*, uint32_t *, unsigned);
41 nv50_push_arrays(struct nv50_context
*, unsigned, unsigned);
43 #define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
45 static INLINE
unsigned
46 nv50_prim(unsigned mode
)
49 case PIPE_PRIM_POINTS
: return NV50TCL_VERTEX_BEGIN_POINTS
;
50 case PIPE_PRIM_LINES
: return NV50TCL_VERTEX_BEGIN_LINES
;
51 case PIPE_PRIM_LINE_LOOP
: return NV50TCL_VERTEX_BEGIN_LINE_LOOP
;
52 case PIPE_PRIM_LINE_STRIP
: return NV50TCL_VERTEX_BEGIN_LINE_STRIP
;
53 case PIPE_PRIM_TRIANGLES
: return NV50TCL_VERTEX_BEGIN_TRIANGLES
;
54 case PIPE_PRIM_TRIANGLE_STRIP
:
55 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP
;
56 case PIPE_PRIM_TRIANGLE_FAN
: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN
;
57 case PIPE_PRIM_QUADS
: return NV50TCL_VERTEX_BEGIN_QUADS
;
58 case PIPE_PRIM_QUAD_STRIP
: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP
;
59 case PIPE_PRIM_POLYGON
: return NV50TCL_VERTEX_BEGIN_POLYGON
;
60 case PIPE_PRIM_LINES_ADJACENCY
:
61 return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY
;
62 case PIPE_PRIM_LINE_STRIP_ADJACENCY
:
63 return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY
;
64 case PIPE_PRIM_TRIANGLES_ADJACENCY
:
65 return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY
;
66 case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
:
67 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY
;
72 NOUVEAU_ERR("invalid primitive type %d\n", mode
);
73 return NV50TCL_VERTEX_BEGIN_POINTS
;
76 static INLINE
uint32_t
77 nv50_vbo_type_to_hw(enum pipe_format format
)
79 const struct util_format_description
*desc
;
81 desc
= util_format_description(format
);
84 switch (desc
->channel
[0].type
) {
85 case UTIL_FORMAT_TYPE_FLOAT
:
86 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT
;
87 case UTIL_FORMAT_TYPE_UNSIGNED
:
88 if (desc
->channel
[0].normalized
) {
89 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM
;
91 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED
;
92 case UTIL_FORMAT_TYPE_SIGNED
:
93 if (desc
->channel
[0].normalized
) {
94 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM
;
96 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED
;
98 case PIPE_FORMAT_TYPE_UINT:
99 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
100 case PIPE_FORMAT_TYPE_SINT:
101 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */
107 static INLINE
uint32_t
108 nv50_vbo_size_to_hw(unsigned size
, unsigned nr_c
)
110 static const uint32_t hw_values
[] = {
112 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8
,
113 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8
,
114 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8
,
115 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8
,
116 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16
,
117 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16
,
118 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16
,
119 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16
,
121 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32
,
122 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32
,
123 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32
,
124 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32
};
126 /* we'd also have R11G11B10 and R10G10B10A2 */
128 assert(nr_c
> 0 && nr_c
<= 4);
134 return hw_values
[size
+ (nr_c
- 1)];
137 static INLINE
uint32_t
138 nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element
*ve
)
140 uint32_t hw_type
, hw_size
;
141 enum pipe_format pf
= ve
->src_format
;
142 const struct util_format_description
*desc
;
145 desc
= util_format_description(pf
);
148 size
= util_format_get_component_bits(pf
, UTIL_FORMAT_COLORSPACE_RGB
, 0);
150 hw_type
= nv50_vbo_type_to_hw(pf
);
151 hw_size
= nv50_vbo_size_to_hw(size
, ve
->nr_components
);
153 if (!hw_type
|| !hw_size
) {
154 NOUVEAU_ERR("unsupported vbo format: %s\n", pf_name(pf
));
159 if (desc
->swizzle
[0] == UTIL_FORMAT_SWIZZLE_Z
) /* BGRA */
160 hw_size
|= (1 << 31); /* no real swizzle bits :-( */
162 return (hw_type
| hw_size
);
165 /* For instanced drawing from user buffers, hitting the FIFO repeatedly
166 * with the same vertex data is probably worse than uploading all data.
169 nv50_upload_vtxbuf(struct nv50_context
*nv50
, unsigned i
)
171 struct nv50_screen
*nscreen
= nv50
->screen
;
172 struct pipe_screen
*pscreen
= &nscreen
->base
.base
;
173 struct pipe_buffer
*buf
= nscreen
->strm_vbuf
[i
];
174 struct pipe_vertex_buffer
*vb
= &nv50
->vtxbuf
[i
];
176 unsigned size
= align(vb
->buffer
->size
, 4096);
178 if (buf
&& buf
->size
< size
)
179 pipe_buffer_reference(&nscreen
->strm_vbuf
[i
], NULL
);
181 if (!nscreen
->strm_vbuf
[i
]) {
182 nscreen
->strm_vbuf
[i
] = pipe_buffer_create(
183 pscreen
, 0, PIPE_BUFFER_USAGE_VERTEX
, size
);
184 buf
= nscreen
->strm_vbuf
[i
];
187 src
= pipe_buffer_map(pscreen
, vb
->buffer
, PIPE_BUFFER_USAGE_CPU_READ
);
190 src
+= vb
->buffer_offset
;
192 size
= (vb
->max_index
+ 1) * vb
->stride
+ 16; /* + 16 is for stride 0 */
193 if (vb
->buffer_offset
+ size
> vb
->buffer
->size
)
194 size
= vb
->buffer
->size
- vb
->buffer_offset
;
196 pipe_buffer_write(pscreen
, buf
, vb
->buffer_offset
, size
, src
);
197 pipe_buffer_unmap(pscreen
, vb
->buffer
);
199 vb
->buffer
= buf
; /* don't pipe_reference, this is a private copy */
204 nv50_upload_user_vbufs(struct nv50_context
*nv50
)
209 nv50
->dirty
|= NV50_NEW_ARRAYS
;
210 if (!(nv50
->dirty
& NV50_NEW_ARRAYS
))
213 for (i
= 0; i
< nv50
->vtxbuf_nr
; ++i
) {
214 if (nv50
->vtxbuf
[i
].buffer
->usage
& PIPE_BUFFER_USAGE_VERTEX
)
216 nv50_upload_vtxbuf(nv50
, i
);
221 nv50_set_static_vtxattr(struct nv50_context
*nv50
, unsigned i
, void *data
)
223 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
224 struct nouveau_channel
*chan
= tesla
->channel
;
227 util_format_read_4f(nv50
->vtxelt
[i
].src_format
,
228 v
, 0, data
, 0, 0, 0, 1, 1);
230 switch (nv50
->vtxelt
[i
].nr_components
) {
232 BEGIN_RING(chan
, tesla
, NV50TCL_VTX_ATTR_4F_X(i
), 4);
233 OUT_RINGf (chan
, v
[0]);
234 OUT_RINGf (chan
, v
[1]);
235 OUT_RINGf (chan
, v
[2]);
236 OUT_RINGf (chan
, v
[3]);
239 BEGIN_RING(chan
, tesla
, NV50TCL_VTX_ATTR_3F_X(i
), 3);
240 OUT_RINGf (chan
, v
[0]);
241 OUT_RINGf (chan
, v
[1]);
242 OUT_RINGf (chan
, v
[2]);
245 BEGIN_RING(chan
, tesla
, NV50TCL_VTX_ATTR_2F_X(i
), 2);
246 OUT_RINGf (chan
, v
[0]);
247 OUT_RINGf (chan
, v
[1]);
250 BEGIN_RING(chan
, tesla
, NV50TCL_VTX_ATTR_1F(i
), 1);
251 OUT_RINGf (chan
, v
[0]);
260 init_per_instance_arrays_immd(struct nv50_context
*nv50
,
261 unsigned startInstance
,
262 unsigned pos
[16], unsigned step
[16])
264 struct nouveau_bo
*bo
;
265 unsigned i
, b
, count
= 0;
267 for (i
= 0; i
< nv50
->vtxelt_nr
; ++i
) {
268 if (!nv50
->vtxelt
[i
].instance_divisor
)
271 b
= nv50
->vtxelt
[i
].vertex_buffer_index
;
273 pos
[i
] = nv50
->vtxelt
[i
].src_offset
+
274 nv50
->vtxbuf
[b
].buffer_offset
+
275 startInstance
* nv50
->vtxbuf
[b
].stride
;
276 step
[i
] = startInstance
% nv50
->vtxelt
[i
].instance_divisor
;
278 bo
= nouveau_bo(nv50
->vtxbuf
[b
].buffer
);
280 nouveau_bo_map(bo
, NOUVEAU_BO_RD
);
282 nv50_set_static_vtxattr(nv50
, i
, (uint8_t *)bo
->map
+ pos
[i
]);
289 init_per_instance_arrays(struct nv50_context
*nv50
,
290 unsigned startInstance
,
291 unsigned pos
[16], unsigned step
[16])
293 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
294 struct nouveau_channel
*chan
= tesla
->channel
;
295 struct nouveau_bo
*bo
;
296 struct nouveau_stateobj
*so
;
297 unsigned i
, b
, count
= 0;
298 const uint32_t rl
= NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
| NOUVEAU_BO_RD
;
301 return init_per_instance_arrays_immd(nv50
, startInstance
,
304 so
= so_new(nv50
->vtxelt_nr
, nv50
->vtxelt_nr
* 2, nv50
->vtxelt_nr
* 2);
306 for (i
= 0; i
< nv50
->vtxelt_nr
; ++i
) {
307 if (!nv50
->vtxelt
[i
].instance_divisor
)
310 b
= nv50
->vtxelt
[i
].vertex_buffer_index
;
312 pos
[i
] = nv50
->vtxelt
[i
].src_offset
+
313 nv50
->vtxbuf
[b
].buffer_offset
+
314 startInstance
* nv50
->vtxbuf
[b
].stride
;
316 if (!startInstance
) {
320 step
[i
] = startInstance
% nv50
->vtxelt
[i
].instance_divisor
;
322 bo
= nouveau_bo(nv50
->vtxbuf
[b
].buffer
);
324 so_method(so
, tesla
, NV50TCL_VERTEX_ARRAY_START_HIGH(i
), 2);
325 so_reloc (so
, bo
, pos
[i
], rl
| NOUVEAU_BO_HIGH
, 0, 0);
326 so_reloc (so
, bo
, pos
[i
], rl
| NOUVEAU_BO_LOW
, 0, 0);
329 if (count
&& startInstance
) {
330 so_ref (so
, &nv50
->state
.instbuf
); /* for flush notify */
331 so_emit(chan
, nv50
->state
.instbuf
);
339 step_per_instance_arrays_immd(struct nv50_context
*nv50
,
340 unsigned pos
[16], unsigned step
[16])
342 struct nouveau_bo
*bo
;
345 for (i
= 0; i
< nv50
->vtxelt_nr
; ++i
) {
346 if (!nv50
->vtxelt
[i
].instance_divisor
)
348 if (++step
[i
] != nv50
->vtxelt
[i
].instance_divisor
)
350 b
= nv50
->vtxelt
[i
].vertex_buffer_index
;
351 bo
= nouveau_bo(nv50
->vtxbuf
[b
].buffer
);
354 pos
[i
] += nv50
->vtxbuf
[b
].stride
;
356 nv50_set_static_vtxattr(nv50
, i
, (uint8_t *)bo
->map
+ pos
[i
]);
361 step_per_instance_arrays(struct nv50_context
*nv50
,
362 unsigned pos
[16], unsigned step
[16])
364 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
365 struct nouveau_channel
*chan
= tesla
->channel
;
366 struct nouveau_bo
*bo
;
367 struct nouveau_stateobj
*so
;
369 const uint32_t rl
= NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
| NOUVEAU_BO_RD
;
371 if (nv50
->vbo_fifo
) {
372 step_per_instance_arrays_immd(nv50
, pos
, step
);
376 so
= so_new(nv50
->vtxelt_nr
, nv50
->vtxelt_nr
* 2, nv50
->vtxelt_nr
* 2);
378 for (i
= 0; i
< nv50
->vtxelt_nr
; ++i
) {
379 if (!nv50
->vtxelt
[i
].instance_divisor
)
381 b
= nv50
->vtxelt
[i
].vertex_buffer_index
;
383 if (++step
[i
] == nv50
->vtxelt
[i
].instance_divisor
) {
385 pos
[i
] += nv50
->vtxbuf
[b
].stride
;
388 bo
= nouveau_bo(nv50
->vtxbuf
[b
].buffer
);
390 so_method(so
, tesla
, NV50TCL_VERTEX_ARRAY_START_HIGH(i
), 2);
391 so_reloc (so
, bo
, pos
[i
], rl
| NOUVEAU_BO_HIGH
, 0, 0);
392 so_reloc (so
, bo
, pos
[i
], rl
| NOUVEAU_BO_LOW
, 0, 0);
395 so_ref (so
, &nv50
->state
.instbuf
); /* for flush notify */
398 so_emit(chan
, nv50
->state
.instbuf
);
402 nv50_unmap_vbufs(struct nv50_context
*nv50
)
406 for (i
= 0; i
< nv50
->vtxbuf_nr
; ++i
)
407 if (nouveau_bo(nv50
->vtxbuf
[i
].buffer
)->map
)
408 nouveau_bo_unmap(nouveau_bo(nv50
->vtxbuf
[i
].buffer
));
412 nv50_draw_arrays_instanced(struct pipe_context
*pipe
,
413 unsigned mode
, unsigned start
, unsigned count
,
414 unsigned startInstance
, unsigned instanceCount
)
416 struct nv50_context
*nv50
= nv50_context(pipe
);
417 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
418 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
419 unsigned i
, nz_divisors
;
420 unsigned step
[16], pos
[16];
422 if (!NV50_USING_LOATHED_EDGEFLAG(nv50
))
423 nv50_upload_user_vbufs(nv50
);
425 nv50_state_validate(nv50
);
427 nz_divisors
= init_per_instance_arrays(nv50
, startInstance
, pos
, step
);
429 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 2);
430 OUT_RING (chan
, NV50_CB_AUX
| (24 << 8));
431 OUT_RING (chan
, startInstance
);
433 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
434 OUT_RING (chan
, nv50_prim(mode
));
437 nv50_push_arrays(nv50
, start
, count
);
439 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BUFFER_FIRST
, 2);
440 OUT_RING (chan
, start
);
441 OUT_RING (chan
, count
);
443 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
446 for (i
= 1; i
< instanceCount
; i
++) {
447 if (nz_divisors
) /* any non-zero array divisors ? */
448 step_per_instance_arrays(nv50
, pos
, step
);
450 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
451 OUT_RING (chan
, nv50_prim(mode
) | (1 << 28));
454 nv50_push_arrays(nv50
, start
, count
);
456 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BUFFER_FIRST
, 2);
457 OUT_RING (chan
, start
);
458 OUT_RING (chan
, count
);
460 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
463 nv50_unmap_vbufs(nv50
);
465 so_ref(NULL
, &nv50
->state
.instbuf
);
469 nv50_draw_arrays(struct pipe_context
*pipe
, unsigned mode
, unsigned start
,
472 struct nv50_context
*nv50
= nv50_context(pipe
);
473 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
474 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
477 nv50_state_validate(nv50
);
479 BEGIN_RING(chan
, tesla
, 0x142c, 1);
481 BEGIN_RING(chan
, tesla
, 0x142c, 1);
484 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
485 OUT_RING (chan
, nv50_prim(mode
));
488 ret
= nv50_push_arrays(nv50
, start
, count
);
490 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BUFFER_FIRST
, 2);
491 OUT_RING (chan
, start
);
492 OUT_RING (chan
, count
);
495 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
498 nv50_unmap_vbufs(nv50
);
500 /* XXX: not sure what to do if ret != TRUE: flush and retry?
505 static INLINE boolean
506 nv50_draw_elements_inline_u08(struct nv50_context
*nv50
, uint8_t *map
,
507 unsigned start
, unsigned count
)
509 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
510 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
515 return nv50_push_elements_u08(nv50
, map
, count
);
518 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
, 1);
519 OUT_RING (chan
, map
[0]);
525 unsigned nr
= count
> 2046 ? 2046 : count
;
528 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VB_ELEMENT_U16
, nr
>> 1);
529 for (i
= 0; i
< nr
; i
+= 2)
530 OUT_RING (chan
, (map
[i
+ 1] << 16) | map
[i
]);
538 static INLINE boolean
539 nv50_draw_elements_inline_u16(struct nv50_context
*nv50
, uint16_t *map
,
540 unsigned start
, unsigned count
)
542 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
543 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
548 return nv50_push_elements_u16(nv50
, map
, count
);
551 BEGIN_RING(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
, 1);
552 OUT_RING (chan
, map
[0]);
558 unsigned nr
= count
> 2046 ? 2046 : count
;
561 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VB_ELEMENT_U16
, nr
>> 1);
562 for (i
= 0; i
< nr
; i
+= 2)
563 OUT_RING (chan
, (map
[i
+ 1] << 16) | map
[i
]);
571 static INLINE boolean
572 nv50_draw_elements_inline_u32(struct nv50_context
*nv50
, uint32_t *map
,
573 unsigned start
, unsigned count
)
575 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
576 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
581 return nv50_push_elements_u32(nv50
, map
, count
);
584 unsigned nr
= count
> 2047 ? 2047 : count
;
586 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VB_ELEMENT_U32
, nr
);
587 OUT_RINGp (chan
, map
, nr
);
596 nv50_draw_elements_inline(struct nv50_context
*nv50
,
597 void *map
, unsigned indexSize
,
598 unsigned start
, unsigned count
)
602 nv50_draw_elements_inline_u08(nv50
, map
, start
, count
);
605 nv50_draw_elements_inline_u16(nv50
, map
, start
, count
);
608 nv50_draw_elements_inline_u32(nv50
, map
, start
, count
);
614 nv50_draw_elements_instanced(struct pipe_context
*pipe
,
615 struct pipe_buffer
*indexBuffer
,
617 unsigned mode
, unsigned start
, unsigned count
,
618 unsigned startInstance
, unsigned instanceCount
)
620 struct nv50_context
*nv50
= nv50_context(pipe
);
621 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
622 struct nouveau_channel
*chan
= tesla
->channel
;
623 struct pipe_screen
*pscreen
= pipe
->screen
;
625 unsigned i
, nz_divisors
;
626 unsigned step
[16], pos
[16];
628 map
= pipe_buffer_map(pscreen
, indexBuffer
, PIPE_BUFFER_USAGE_CPU_READ
);
630 if (!NV50_USING_LOATHED_EDGEFLAG(nv50
))
631 nv50_upload_user_vbufs(nv50
);
633 nv50_state_validate(nv50
);
635 nz_divisors
= init_per_instance_arrays(nv50
, startInstance
, pos
, step
);
637 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 2);
638 OUT_RING (chan
, NV50_CB_AUX
| (24 << 8));
639 OUT_RING (chan
, startInstance
);
641 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
642 OUT_RING (chan
, nv50_prim(mode
));
644 nv50_draw_elements_inline(nv50
, map
, indexSize
, start
, count
);
646 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
649 for (i
= 1; i
< instanceCount
; ++i
) {
650 if (nz_divisors
) /* any non-zero array divisors ? */
651 step_per_instance_arrays(nv50
, pos
, step
);
653 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
654 OUT_RING (chan
, nv50_prim(mode
) | (1 << 28));
656 nv50_draw_elements_inline(nv50
, map
, indexSize
, start
, count
);
658 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
661 nv50_unmap_vbufs(nv50
);
663 so_ref(NULL
, &nv50
->state
.instbuf
);
667 nv50_draw_elements(struct pipe_context
*pipe
,
668 struct pipe_buffer
*indexBuffer
, unsigned indexSize
,
669 unsigned mode
, unsigned start
, unsigned count
)
671 struct nv50_context
*nv50
= nv50_context(pipe
);
672 struct nouveau_channel
*chan
= nv50
->screen
->tesla
->channel
;
673 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
674 struct pipe_screen
*pscreen
= pipe
->screen
;
677 map
= pipe_buffer_map(pscreen
, indexBuffer
, PIPE_BUFFER_USAGE_CPU_READ
);
679 nv50_state_validate(nv50
);
681 BEGIN_RING(chan
, tesla
, 0x142c, 1);
683 BEGIN_RING(chan
, tesla
, 0x142c, 1);
686 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_BEGIN
, 1);
687 OUT_RING (chan
, nv50_prim(mode
));
689 nv50_draw_elements_inline(nv50
, map
, indexSize
, start
, count
);
691 BEGIN_RING(chan
, tesla
, NV50TCL_VERTEX_END
, 1);
694 nv50_unmap_vbufs(nv50
);
696 pipe_buffer_unmap(pscreen
, indexBuffer
);
699 static INLINE boolean
700 nv50_vbo_static_attrib(struct nv50_context
*nv50
, unsigned attrib
,
701 struct nouveau_stateobj
**pso
,
702 struct pipe_vertex_element
*ve
,
703 struct pipe_vertex_buffer
*vb
)
706 struct nouveau_stateobj
*so
;
707 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
708 struct nouveau_bo
*bo
= nouveau_bo(vb
->buffer
);
712 ret
= nouveau_bo_map(bo
, NOUVEAU_BO_RD
);
716 util_format_read_4f(ve
->src_format
, v
, 0, (uint8_t *)bo
->map
+
717 (vb
->buffer_offset
+ ve
->src_offset
), 0,
721 *pso
= so
= so_new(nv50
->vtxelt_nr
, nv50
->vtxelt_nr
* 4, 0);
723 switch (ve
->nr_components
) {
725 so_method(so
, tesla
, NV50TCL_VTX_ATTR_4F_X(attrib
), 4);
726 so_data (so
, fui(v
[0]));
727 so_data (so
, fui(v
[1]));
728 so_data (so
, fui(v
[2]));
729 so_data (so
, fui(v
[3]));
732 so_method(so
, tesla
, NV50TCL_VTX_ATTR_3F_X(attrib
), 3);
733 so_data (so
, fui(v
[0]));
734 so_data (so
, fui(v
[1]));
735 so_data (so
, fui(v
[2]));
738 so_method(so
, tesla
, NV50TCL_VTX_ATTR_2F_X(attrib
), 2);
739 so_data (so
, fui(v
[0]));
740 so_data (so
, fui(v
[1]));
743 if (attrib
== nv50
->vertprog
->cfg
.edgeflag_in
) {
744 so_method(so
, tesla
, NV50TCL_EDGEFLAG_ENABLE
, 1);
745 so_data (so
, v
[0] ? 1 : 0);
747 so_method(so
, tesla
, NV50TCL_VTX_ATTR_1F(attrib
), 1);
748 so_data (so
, fui(v
[0]));
751 nouveau_bo_unmap(bo
);
755 nouveau_bo_unmap(bo
);
760 nv50_vbo_validate(struct nv50_context
*nv50
)
762 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
763 struct nouveau_stateobj
*vtxbuf
, *vtxfmt
, *vtxattr
;
766 /* don't validate if Gallium took away our buffers */
767 if (nv50
->vtxbuf_nr
== 0)
771 for (i
= 0; i
< nv50
->vtxbuf_nr
; ++i
)
772 if (nv50
->vtxbuf
[i
].stride
&&
773 !(nv50
->vtxbuf
[i
].buffer
->usage
& PIPE_BUFFER_USAGE_VERTEX
))
774 nv50
->vbo_fifo
= 0xffff;
776 if (NV50_USING_LOATHED_EDGEFLAG(nv50
))
777 nv50
->vbo_fifo
= 0xffff; /* vertprog can't set edgeflag */
779 n_ve
= MAX2(nv50
->vtxelt_nr
, nv50
->state
.vtxelt_nr
);
782 vtxbuf
= so_new(n_ve
* 2, n_ve
* 5, nv50
->vtxelt_nr
* 4);
783 vtxfmt
= so_new(1, n_ve
, 0);
784 so_method(vtxfmt
, tesla
, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve
);
786 for (i
= 0; i
< nv50
->vtxelt_nr
; i
++) {
787 struct pipe_vertex_element
*ve
= &nv50
->vtxelt
[i
];
788 struct pipe_vertex_buffer
*vb
=
789 &nv50
->vtxbuf
[ve
->vertex_buffer_index
];
790 struct nouveau_bo
*bo
= nouveau_bo(vb
->buffer
);
791 uint32_t hw
= nv50_vbo_vtxelt_to_hw(ve
);
794 nv50_vbo_static_attrib(nv50
, i
, &vtxattr
, ve
, vb
)) {
795 so_data(vtxfmt
, hw
| (1 << 4));
797 so_method(vtxbuf
, tesla
,
798 NV50TCL_VERTEX_ARRAY_FORMAT(i
), 1);
801 nv50
->vbo_fifo
&= ~(1 << i
);
805 if (nv50
->vbo_fifo
) {
806 so_data (vtxfmt
, hw
|
807 (ve
->instance_divisor
? (1 << 4) : i
));
808 so_method(vtxbuf
, tesla
,
809 NV50TCL_VERTEX_ARRAY_FORMAT(i
), 1);
813 so_data(vtxfmt
, hw
| i
);
815 so_method(vtxbuf
, tesla
, NV50TCL_VERTEX_ARRAY_FORMAT(i
), 3);
816 so_data (vtxbuf
, 0x20000000 |
817 (ve
->instance_divisor
? 0 : vb
->stride
));
818 so_reloc (vtxbuf
, bo
, vb
->buffer_offset
+
819 ve
->src_offset
, NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
|
820 NOUVEAU_BO_RD
| NOUVEAU_BO_HIGH
, 0, 0);
821 so_reloc (vtxbuf
, bo
, vb
->buffer_offset
+
822 ve
->src_offset
, NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
|
823 NOUVEAU_BO_RD
| NOUVEAU_BO_LOW
, 0, 0);
825 /* vertex array limits */
826 so_method(vtxbuf
, tesla
, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i
), 2);
827 so_reloc (vtxbuf
, bo
, vb
->buffer
->size
- 1,
828 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
| NOUVEAU_BO_RD
|
829 NOUVEAU_BO_HIGH
, 0, 0);
830 so_reloc (vtxbuf
, bo
, vb
->buffer
->size
- 1,
831 NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
| NOUVEAU_BO_RD
|
832 NOUVEAU_BO_LOW
, 0, 0);
834 for (; i
< n_ve
; ++i
) {
835 so_data (vtxfmt
, 0x7e080010);
837 so_method(vtxbuf
, tesla
, NV50TCL_VERTEX_ARRAY_FORMAT(i
), 1);
840 nv50
->state
.vtxelt_nr
= nv50
->vtxelt_nr
;
842 so_ref (vtxfmt
, &nv50
->state
.vtxfmt
);
843 so_ref (vtxbuf
, &nv50
->state
.vtxbuf
);
844 so_ref (vtxattr
, &nv50
->state
.vtxattr
);
845 so_ref (NULL
, &vtxbuf
);
846 so_ref (NULL
, &vtxfmt
);
847 so_ref (NULL
, &vtxattr
);
850 typedef void (*pfn_push
)(struct nouveau_channel
*, void *);
852 struct nv50_vbo_emitctx
862 unsigned ve_edgeflag
;
866 emit_vtx_next(struct nouveau_channel
*chan
, struct nv50_vbo_emitctx
*emit
)
870 for (i
= 0; i
< emit
->nr_ve
; ++i
) {
871 emit
->push
[i
](chan
, emit
->map
[i
]);
872 emit
->map
[i
] += emit
->stride
[i
];
877 emit_vtx(struct nouveau_channel
*chan
, struct nv50_vbo_emitctx
*emit
,
882 for (i
= 0; i
< emit
->nr_ve
; ++i
)
883 emit
->push
[i
](chan
, emit
->map
[i
] + emit
->stride
[i
] * vi
);
886 static INLINE boolean
887 nv50_map_vbufs(struct nv50_context
*nv50
)
891 for (i
= 0; i
< nv50
->vtxbuf_nr
; ++i
) {
892 struct pipe_vertex_buffer
*vb
= &nv50
->vtxbuf
[i
];
893 unsigned size
= vb
->stride
* (vb
->max_index
+ 1) + 16;
895 if (nouveau_bo(vb
->buffer
)->map
)
898 size
= vb
->stride
* (vb
->max_index
+ 1) + 16;
899 size
= MIN2(size
, vb
->buffer
->size
);
901 size
= vb
->buffer
->size
;
903 if (nouveau_bo_map_range(nouveau_bo(vb
->buffer
),
904 0, size
, NOUVEAU_BO_RD
))
908 if (i
== nv50
->vtxbuf_nr
)
911 nouveau_bo_unmap(nouveau_bo(nv50
->vtxbuf
[i
].buffer
));
916 emit_b32_1(struct nouveau_channel
*chan
, void *data
)
920 OUT_RING(chan
, v
[0]);
924 emit_b32_2(struct nouveau_channel
*chan
, void *data
)
928 OUT_RING(chan
, v
[0]);
929 OUT_RING(chan
, v
[1]);
933 emit_b32_3(struct nouveau_channel
*chan
, void *data
)
937 OUT_RING(chan
, v
[0]);
938 OUT_RING(chan
, v
[1]);
939 OUT_RING(chan
, v
[2]);
943 emit_b32_4(struct nouveau_channel
*chan
, void *data
)
947 OUT_RING(chan
, v
[0]);
948 OUT_RING(chan
, v
[1]);
949 OUT_RING(chan
, v
[2]);
950 OUT_RING(chan
, v
[3]);
954 emit_b16_1(struct nouveau_channel
*chan
, void *data
)
958 OUT_RING(chan
, v
[0]);
962 emit_b16_3(struct nouveau_channel
*chan
, void *data
)
966 OUT_RING(chan
, (v
[1] << 16) | v
[0]);
967 OUT_RING(chan
, v
[2]);
971 emit_b08_1(struct nouveau_channel
*chan
, void *data
)
975 OUT_RING(chan
, v
[0]);
979 emit_b08_3(struct nouveau_channel
*chan
, void *data
)
983 OUT_RING(chan
, (v
[2] << 16) | (v
[1] << 8) | v
[0]);
987 emit_prepare(struct nv50_context
*nv50
, struct nv50_vbo_emitctx
*emit
,
992 if (nv50_map_vbufs(nv50
) == FALSE
)
995 emit
->ve_edgeflag
= nv50
->vertprog
->cfg
.edgeflag_in
;
997 emit
->edgeflag
= 0.5f
;
999 emit
->vtx_dwords
= 0;
1001 for (i
= 0; i
< nv50
->vtxelt_nr
; ++i
) {
1002 struct pipe_vertex_element
*ve
;
1003 struct pipe_vertex_buffer
*vb
;
1005 const struct util_format_description
*desc
;
1007 ve
= &nv50
->vtxelt
[i
];
1008 vb
= &nv50
->vtxbuf
[ve
->vertex_buffer_index
];
1009 if (!(nv50
->vbo_fifo
& (1 << i
)) || ve
->instance_divisor
)
1013 emit
->stride
[n
] = vb
->stride
;
1014 emit
->map
[n
] = (uint8_t *)nouveau_bo(vb
->buffer
)->map
+
1016 (start
* vb
->stride
+ ve
->src_offset
);
1018 desc
= util_format_description(ve
->src_format
);
1021 size
= util_format_get_component_bits(
1022 ve
->src_format
, UTIL_FORMAT_COLORSPACE_RGB
, 0);
1024 assert(ve
->nr_components
> 0 && ve
->nr_components
<= 4);
1026 /* It shouldn't be necessary to push the implicit 1s
1027 * for case 3 and size 8 cases 1, 2, 3.
1031 NOUVEAU_ERR("unsupported vtxelt size: %u\n", size
);
1034 switch (ve
->nr_components
) {
1035 case 1: emit
->push
[n
] = emit_b32_1
; break;
1036 case 2: emit
->push
[n
] = emit_b32_2
; break;
1037 case 3: emit
->push
[n
] = emit_b32_3
; break;
1038 case 4: emit
->push
[n
] = emit_b32_4
; break;
1040 emit
->vtx_dwords
+= ve
->nr_components
;
1043 switch (ve
->nr_components
) {
1044 case 1: emit
->push
[n
] = emit_b16_1
; break;
1045 case 2: emit
->push
[n
] = emit_b32_1
; break;
1046 case 3: emit
->push
[n
] = emit_b16_3
; break;
1047 case 4: emit
->push
[n
] = emit_b32_2
; break;
1049 emit
->vtx_dwords
+= (ve
->nr_components
+ 1) >> 1;
1052 switch (ve
->nr_components
) {
1053 case 1: emit
->push
[n
] = emit_b08_1
; break;
1054 case 2: emit
->push
[n
] = emit_b16_1
; break;
1055 case 3: emit
->push
[n
] = emit_b08_3
; break;
1056 case 4: emit
->push
[n
] = emit_b32_1
; break;
1058 emit
->vtx_dwords
+= 1;
1063 emit
->vtx_max
= 512 / emit
->vtx_dwords
;
1064 if (emit
->ve_edgeflag
< 16)
1071 set_edgeflag(struct nouveau_channel
*chan
,
1072 struct nouveau_grobj
*tesla
,
1073 struct nv50_vbo_emitctx
*emit
, uint32_t index
)
1075 unsigned i
= emit
->ve_edgeflag
;
1078 float f
= *((float *)(emit
->map
[i
] + index
* emit
->stride
[i
]));
1080 if (emit
->edgeflag
!= f
) {
1083 BEGIN_RING(chan
, tesla
, 0x15e4, 1);
1084 OUT_RING (chan
, f
? 1 : 0);
1090 nv50_push_arrays(struct nv50_context
*nv50
, unsigned start
, unsigned count
)
1092 struct nouveau_channel
*chan
= nv50
->screen
->base
.channel
;
1093 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1094 struct nv50_vbo_emitctx emit
;
1096 if (emit_prepare(nv50
, &emit
, start
) == FALSE
)
1100 unsigned i
, dw
, nr
= MIN2(count
, emit
.vtx_max
);
1101 dw
= nr
* emit
.vtx_dwords
;
1103 set_edgeflag(chan
, tesla
, &emit
, 0); /* nr will be 1 */
1105 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VERTEX_DATA
, dw
);
1106 for (i
= 0; i
< nr
; ++i
)
1107 emit_vtx_next(chan
, &emit
);
1116 nv50_push_elements_u32(struct nv50_context
*nv50
, uint32_t *map
, unsigned count
)
1118 struct nouveau_channel
*chan
= nv50
->screen
->base
.channel
;
1119 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1120 struct nv50_vbo_emitctx emit
;
1122 if (emit_prepare(nv50
, &emit
, 0) == FALSE
)
1126 unsigned i
, dw
, nr
= MIN2(count
, emit
.vtx_max
);
1127 dw
= nr
* emit
.vtx_dwords
;
1129 set_edgeflag(chan
, tesla
, &emit
, *map
);
1131 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VERTEX_DATA
, dw
);
1132 for (i
= 0; i
< nr
; ++i
)
1133 emit_vtx(chan
, &emit
, *map
++);
1142 nv50_push_elements_u16(struct nv50_context
*nv50
, uint16_t *map
, unsigned count
)
1144 struct nouveau_channel
*chan
= nv50
->screen
->base
.channel
;
1145 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1146 struct nv50_vbo_emitctx emit
;
1148 if (emit_prepare(nv50
, &emit
, 0) == FALSE
)
1152 unsigned i
, dw
, nr
= MIN2(count
, emit
.vtx_max
);
1153 dw
= nr
* emit
.vtx_dwords
;
1155 set_edgeflag(chan
, tesla
, &emit
, *map
);
1157 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VERTEX_DATA
, dw
);
1158 for (i
= 0; i
< nr
; ++i
)
1159 emit_vtx(chan
, &emit
, *map
++);
1168 nv50_push_elements_u08(struct nv50_context
*nv50
, uint8_t *map
, unsigned count
)
1170 struct nouveau_channel
*chan
= nv50
->screen
->base
.channel
;
1171 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1172 struct nv50_vbo_emitctx emit
;
1174 if (emit_prepare(nv50
, &emit
, 0) == FALSE
)
1178 unsigned i
, dw
, nr
= MIN2(count
, emit
.vtx_max
);
1179 dw
= nr
* emit
.vtx_dwords
;
1181 set_edgeflag(chan
, tesla
, &emit
, *map
);
1183 BEGIN_RING_NI(chan
, tesla
, NV50TCL_VERTEX_DATA
, dw
);
1184 for (i
= 0; i
< nr
; ++i
)
1185 emit_vtx(chan
, &emit
, *map
++);