nv40: fix inline u08/u16 indices
[mesa.git] / src / mesa / pipe / nv40 / nv40_vbo.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "pipe/p_util.h"
4
5 #include "nv40_context.h"
6 #include "nv40_state.h"
7
8 #include "pipe/nouveau/nouveau_channel.h"
9 #include "pipe/nouveau/nouveau_pushbuf.h"
10
11 static INLINE int
12 nv40_vbo_ncomp(uint format)
13 {
14 int ncomp = 0;
15
16 if (pf_size_x(format)) ncomp++;
17 if (pf_size_y(format)) ncomp++;
18 if (pf_size_z(format)) ncomp++;
19 if (pf_size_w(format)) ncomp++;
20
21 return ncomp;
22 }
23
24 static INLINE int
25 nv40_vbo_type(uint format)
26 {
27 switch (pf_type(format)) {
28 case PIPE_FORMAT_TYPE_FLOAT:
29 return NV40TCL_VTXFMT_TYPE_FLOAT;
30 case PIPE_FORMAT_TYPE_UNORM:
31 return NV40TCL_VTXFMT_TYPE_UBYTE;
32 default:
33 assert(0);
34 }
35 }
36
37 static boolean
38 nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib,
39 struct pipe_vertex_element *ve,
40 struct pipe_vertex_buffer *vb)
41 {
42 struct pipe_winsys *ws = nv40->pipe.winsys;
43 int type, ncomp;
44 void *map;
45
46 type = nv40_vbo_type(ve->src_format);
47 ncomp = nv40_vbo_ncomp(ve->src_format);
48
49 map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
50 map += vb->buffer_offset + ve->src_offset;
51
52 switch (type) {
53 case NV40TCL_VTXFMT_TYPE_FLOAT:
54 {
55 float *v = map;
56
57 BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4);
58 switch (ncomp) {
59 case 4:
60 OUT_RINGf(v[0]);
61 OUT_RINGf(v[1]);
62 OUT_RINGf(v[2]);
63 OUT_RINGf(v[3]);
64 break;
65 case 3:
66 OUT_RINGf(v[0]);
67 OUT_RINGf(v[1]);
68 OUT_RINGf(v[2]);
69 OUT_RINGf(1.0);
70 break;
71 case 2:
72 OUT_RINGf(v[0]);
73 OUT_RINGf(v[1]);
74 OUT_RINGf(0.0);
75 OUT_RINGf(1.0);
76 break;
77 case 1:
78 OUT_RINGf(v[0]);
79 OUT_RINGf(0.0);
80 OUT_RINGf(0.0);
81 OUT_RINGf(1.0);
82 break;
83 default:
84 ws->buffer_unmap(ws, vb->buffer);
85 return FALSE;
86 }
87 }
88 break;
89 default:
90 ws->buffer_unmap(ws, vb->buffer);
91 return FALSE;
92 }
93
94 ws->buffer_unmap(ws, vb->buffer);
95
96 return TRUE;
97 }
98
99 static void
100 nv40_vbo_arrays_update(struct nv40_context *nv40, struct pipe_buffer *ib,
101 unsigned ib_format)
102 {
103 struct nv40_vertex_program *vp = nv40->vertprog.active;
104 struct nouveau_stateobj *vtxbuf, *vtxfmt;
105 unsigned inputs, hw, num_hw;
106 unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
107
108 inputs = vp->ir;
109 for (hw = 0; hw < 16 && inputs; hw++) {
110 if (inputs & (1 << hw)) {
111 num_hw = hw;
112 inputs &= ~(1 << hw);
113 }
114 }
115 num_hw++;
116
117 vtxbuf = so_new(20, 18);
118 so_method(vtxbuf, nv40->hw->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw);
119 vtxfmt = so_new(17, 0);
120 so_method(vtxfmt, nv40->hw->curie, NV40TCL_VTXFMT(0), num_hw);
121
122 inputs = vp->ir;
123 for (hw = 0; hw < num_hw; hw++) {
124 struct pipe_vertex_element *ve;
125 struct pipe_vertex_buffer *vb;
126
127 if (!(inputs & (1 << hw))) {
128 so_data(vtxbuf, 0);
129 so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT);
130 continue;
131 }
132
133 ve = &nv40->vtxelt[hw];
134 vb = &nv40->vtxbuf[ve->vertex_buffer_index];
135
136 if (!vb->pitch && nv40_vbo_static_attrib(nv40, hw, ve, vb)) {
137 so_data(vtxbuf, 0);
138 so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT);
139 continue;
140 }
141
142 so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset,
143 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
144 0, NV40TCL_VTXBUF_ADDRESS_DMA1);
145 so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) |
146 (nv40_vbo_ncomp(ve->src_format) <<
147 NV40TCL_VTXFMT_SIZE_SHIFT) |
148 nv40_vbo_type(ve->src_format)));
149 }
150
151 if (ib) {
152 so_method(vtxbuf, nv40->hw->curie, NV40TCL_IDXBUF_ADDRESS, 2);
153 so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
154 so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR,
155 0, NV40TCL_IDXBUF_FORMAT_DMA1);
156 }
157
158 so_emit(nv40->nvws, vtxfmt);
159 so_emit(nv40->nvws, vtxbuf);
160 so_ref (vtxbuf, &nv40->so_vtxbuf);
161 so_ref (NULL, &vtxbuf);
162 so_ref (NULL, &vtxfmt);
163 }
164
165 static boolean
166 nv40_vbo_validate_state(struct nv40_context *nv40,
167 struct pipe_buffer *ib, unsigned ib_format)
168 {
169 unsigned vdn = nv40->dirty & NV40_NEW_ARRAYS;
170
171 nv40_emit_hw_state(nv40);
172 if (vdn || ib) {
173 nv40_vbo_arrays_update(nv40, ib, ib_format);
174 nv40->dirty &= ~NV40_NEW_ARRAYS;
175 }
176
177 so_emit_reloc_markers(nv40->nvws, nv40->so_vtxbuf);
178
179 BEGIN_RING(curie, 0x1710, 1);
180 OUT_RING (0); /* vtx cache flush */
181
182 return TRUE;
183 }
184
185 boolean
186 nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
187 unsigned count)
188 {
189 struct nv40_context *nv40 = nv40_context(pipe);
190 unsigned nr;
191
192 assert(nv40_vbo_validate_state(nv40, NULL, 0));
193
194 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
195 OUT_RING (nvgl_primitive(mode));
196
197 nr = (count & 0xff);
198 if (nr) {
199 BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1);
200 OUT_RING (((nr - 1) << 24) | start);
201 start += nr;
202 }
203
204 nr = count >> 8;
205 while (nr) {
206 unsigned push = nr > 2047 ? 2047 : nr;
207
208 nr -= push;
209
210 BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push);
211 while (push--) {
212 OUT_RING(((0x100 - 1) << 24) | start);
213 start += 0x100;
214 }
215 }
216
217 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
218 OUT_RING (0);
219
220 pipe->flush(pipe, 0);
221 return TRUE;
222 }
223
224 static INLINE void
225 nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
226 unsigned start, unsigned count)
227 {
228 uint8_t *elts = (uint8_t *)ib + start;
229 int push, i;
230
231 if (count & 1) {
232 BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
233 OUT_RING (elts[0]);
234 elts++; count--;
235 }
236
237 while (count) {
238 push = MIN2(count, 2047 * 2);
239
240 BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
241 for (i = 0; i < push; i+=2)
242 OUT_RING((elts[i+1] << 16) | elts[i]);
243
244 count -= push;
245 elts += push;
246 }
247 }
248
249 static INLINE void
250 nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
251 unsigned start, unsigned count)
252 {
253 uint16_t *elts = (uint16_t *)ib + start;
254 int push, i;
255
256 if (count & 1) {
257 BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
258 OUT_RING (elts[0]);
259 elts++; count--;
260 }
261
262 while (count) {
263 push = MIN2(count, 2047 * 2);
264
265 BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
266 for (i = 0; i < push; i+=2)
267 OUT_RING((elts[i+1] << 16) | elts[i]);
268
269 count -= push;
270 elts += push;
271 }
272 }
273
274 static INLINE void
275 nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
276 unsigned start, unsigned count)
277 {
278 uint32_t *elts = (uint32_t *)ib + start;
279 int push;
280
281 while (count) {
282 push = MIN2(count, 2047);
283
284 BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push);
285 OUT_RINGp (elts, push);
286
287 count -= push;
288 elts += push;
289 }
290 }
291
292 static boolean
293 nv40_draw_elements_inline(struct pipe_context *pipe,
294 struct pipe_buffer *ib, unsigned ib_size,
295 unsigned mode, unsigned start, unsigned count)
296 {
297 struct nv40_context *nv40 = nv40_context(pipe);
298 struct pipe_winsys *ws = pipe->winsys;
299 void *map;
300
301 assert(nv40_vbo_validate_state(nv40, NULL, 0));
302
303 map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
304 if (!ib)
305 assert(0);
306
307 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
308 OUT_RING (nvgl_primitive(mode));
309
310 switch (ib_size) {
311 case 1:
312 nv40_draw_elements_u08(nv40, map, start, count);
313 break;
314 case 2:
315 nv40_draw_elements_u16(nv40, map, start, count);
316 break;
317 case 4:
318 nv40_draw_elements_u32(nv40, map, start, count);
319 break;
320 default:
321 assert(0);
322 break;
323 }
324
325 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
326 OUT_RING (0);
327
328 ws->buffer_unmap(ws, ib);
329
330 return TRUE;
331 }
332
333 static boolean
334 nv40_draw_elements_vbo(struct pipe_context *pipe,
335 struct pipe_buffer *ib, unsigned ib_size,
336 unsigned mode, unsigned start, unsigned count)
337 {
338 struct nv40_context *nv40 = nv40_context(pipe);
339 unsigned nr, type;
340
341 switch (ib_size) {
342 case 2:
343 type = NV40TCL_IDXBUF_FORMAT_TYPE_U16;
344 break;
345 case 4:
346 type = NV40TCL_IDXBUF_FORMAT_TYPE_U32;
347 break;
348 default:
349 assert(0);
350 }
351
352 assert(nv40_vbo_validate_state(nv40, ib, type));
353
354 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
355 OUT_RING (nvgl_primitive(mode));
356
357 nr = (count & 0xff);
358 if (nr) {
359 BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1);
360 OUT_RING (((nr - 1) << 24) | start);
361 start += nr;
362 }
363
364 nr = count >> 8;
365 while (nr) {
366 unsigned push = nr > 2047 ? 2047 : nr;
367
368 nr -= push;
369
370 BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push);
371 while (push--) {
372 OUT_RING(((0x100 - 1) << 24) | start);
373 start += 0x100;
374 }
375 }
376
377 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
378 OUT_RING (0);
379
380 return TRUE;
381 }
382
383 boolean
384 nv40_draw_elements(struct pipe_context *pipe,
385 struct pipe_buffer *indexBuffer, unsigned indexSize,
386 unsigned mode, unsigned start, unsigned count)
387 {
388 struct nv40_context *nv40 = nv40_context(pipe);
389
390 /* 0x4497 doesn't support real index buffers, and there doesn't appear
391 * to be support on any chipset for 8-bit indices.
392 */
393 if (nv40->hw->curie->grclass == NV44TCL || indexSize == 1) {
394 nv40_draw_elements_inline(pipe, indexBuffer, indexSize,
395 mode, start, count);
396 } else {
397 nv40_draw_elements_vbo(pipe, indexBuffer, indexSize,
398 mode, start, count);
399 }
400
401 pipe->flush(pipe, 0);
402 return TRUE;
403 }
404
405