Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / gallium / drivers / nv50 / nv50_push.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "util/u_inlines.h"
4 #include "util/u_format.h"
5 #include "util/u_split_prim.h"
6
7 #include "nv50_context.h"
8 #include "nv50_resource.h"
9
10 struct push_context {
11 struct nv50_context *nv50;
12
13 unsigned vtx_size;
14
15 void *idxbuf;
16 int32_t idxbias;
17 unsigned idxsize;
18
19 float edgeflag;
20 int edgeflag_attr;
21
22 struct {
23 void *map;
24 unsigned stride;
25 unsigned divisor;
26 unsigned step;
27 void (*push)(struct nouveau_channel *, void *);
28 } attr[16];
29 unsigned attr_nr;
30 };
31
32 static void
33 emit_b32_1(struct nouveau_channel *chan, void *data)
34 {
35 uint32_t *v = data;
36
37 OUT_RING(chan, v[0]);
38 }
39
40 static void
41 emit_b32_2(struct nouveau_channel *chan, void *data)
42 {
43 uint32_t *v = data;
44
45 OUT_RING(chan, v[0]);
46 OUT_RING(chan, v[1]);
47 }
48
49 static void
50 emit_b32_3(struct nouveau_channel *chan, void *data)
51 {
52 uint32_t *v = data;
53
54 OUT_RING(chan, v[0]);
55 OUT_RING(chan, v[1]);
56 OUT_RING(chan, v[2]);
57 }
58
59 static void
60 emit_b32_4(struct nouveau_channel *chan, void *data)
61 {
62 uint32_t *v = data;
63
64 OUT_RING(chan, v[0]);
65 OUT_RING(chan, v[1]);
66 OUT_RING(chan, v[2]);
67 OUT_RING(chan, v[3]);
68 }
69
70 static void
71 emit_b16_1(struct nouveau_channel *chan, void *data)
72 {
73 uint16_t *v = data;
74
75 OUT_RING(chan, v[0]);
76 }
77
78 static void
79 emit_b16_3(struct nouveau_channel *chan, void *data)
80 {
81 uint16_t *v = data;
82
83 OUT_RING(chan, (v[1] << 16) | v[0]);
84 OUT_RING(chan, v[2]);
85 }
86
87 static void
88 emit_b08_1(struct nouveau_channel *chan, void *data)
89 {
90 uint8_t *v = data;
91
92 OUT_RING(chan, v[0]);
93 }
94
95 static void
96 emit_b08_3(struct nouveau_channel *chan, void *data)
97 {
98 uint8_t *v = data;
99
100 OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
101 }
102
103 static INLINE void
104 emit_vertex(struct push_context *ctx, unsigned n)
105 {
106 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
107 struct nouveau_channel *chan = tesla->channel;
108 int i;
109
110 if (ctx->edgeflag_attr < 16) {
111 float *edgeflag = (float *)
112 ((uint8_t *)ctx->attr[ctx->edgeflag_attr].map +
113 ctx->attr[ctx->edgeflag_attr].stride * n);
114
115 if (*edgeflag != ctx->edgeflag) {
116 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
117 OUT_RING (chan, *edgeflag ? 1 : 0);
118 ctx->edgeflag = *edgeflag;
119 }
120 }
121
122 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
123 for (i = 0; i < ctx->attr_nr; i++)
124 ctx->attr[i].push(chan,
125 (uint8_t *)ctx->attr[i].map + ctx->attr[i].stride * n);
126 }
127
128 static void
129 emit_edgeflag(void *priv, boolean enabled)
130 {
131 struct push_context *ctx = priv;
132 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
133 struct nouveau_channel *chan = tesla->channel;
134
135 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
136 OUT_RING (chan, enabled ? 1 : 0);
137 }
138
139 static void
140 emit_elt08(void *priv, unsigned start, unsigned count)
141 {
142 struct push_context *ctx = priv;
143 uint8_t *idxbuf = ctx->idxbuf;
144
145 while (count--)
146 emit_vertex(ctx, idxbuf[start++]);
147 }
148
149 static void
150 emit_elt08_biased(void *priv, unsigned start, unsigned count)
151 {
152 struct push_context *ctx = priv;
153 uint8_t *idxbuf = ctx->idxbuf;
154
155 while (count--)
156 emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
157 }
158
159 static void
160 emit_elt16(void *priv, unsigned start, unsigned count)
161 {
162 struct push_context *ctx = priv;
163 uint16_t *idxbuf = ctx->idxbuf;
164
165 while (count--)
166 emit_vertex(ctx, idxbuf[start++]);
167 }
168
169 static void
170 emit_elt16_biased(void *priv, unsigned start, unsigned count)
171 {
172 struct push_context *ctx = priv;
173 uint16_t *idxbuf = ctx->idxbuf;
174
175 while (count--)
176 emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
177 }
178
179 static void
180 emit_elt32(void *priv, unsigned start, unsigned count)
181 {
182 struct push_context *ctx = priv;
183 uint32_t *idxbuf = ctx->idxbuf;
184
185 while (count--)
186 emit_vertex(ctx, idxbuf[start++]);
187 }
188
189 static void
190 emit_elt32_biased(void *priv, unsigned start, unsigned count)
191 {
192 struct push_context *ctx = priv;
193 uint32_t *idxbuf = ctx->idxbuf;
194
195 while (count--)
196 emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
197 }
198
199 static void
200 emit_verts(void *priv, unsigned start, unsigned count)
201 {
202 while (count--)
203 emit_vertex(priv, start++);
204 }
205
206 void
207 nv50_push_elements_instanced(struct pipe_context *pipe,
208 struct pipe_resource *idxbuf,
209 unsigned idxsize, int idxbias,
210 unsigned mode, unsigned start, unsigned count,
211 unsigned i_start, unsigned i_count)
212 {
213 struct nv50_context *nv50 = nv50_context(pipe);
214 struct nouveau_grobj *tesla = nv50->screen->tesla;
215 struct nouveau_channel *chan = tesla->channel;
216 struct push_context ctx;
217 const unsigned p_overhead = 4 + /* begin/end */
218 4; /* potential edgeflag enable/disable */
219 const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
220 2; /* potential edgeflag modification */
221 struct util_split_prim s;
222 unsigned vtx_size;
223 boolean nzi = FALSE;
224 int i;
225
226 ctx.nv50 = nv50;
227 ctx.attr_nr = 0;
228 ctx.idxbuf = NULL;
229 ctx.vtx_size = 0;
230 ctx.edgeflag = 0.5f;
231 ctx.edgeflag_attr = nv50->vertprog->vp.edgeflag;
232
233 /* map vertex buffers, determine vertex size */
234 for (i = 0; i < nv50->vtxelt->num_elements; i++) {
235 struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
236 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
237 struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
238 unsigned size, nr_components, n;
239
240 if (!(nv50->vbo_fifo & (1 << i)))
241 continue;
242 n = ctx.attr_nr++;
243
244 if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
245 assert(bo->map);
246 return;
247 }
248 ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset;
249 nouveau_bo_unmap(bo);
250
251 ctx.attr[n].stride = vb->stride;
252 ctx.attr[n].divisor = ve->instance_divisor;
253 if (ctx.attr[n].divisor) {
254 ctx.attr[n].step = i_start % ve->instance_divisor;
255 ctx.attr[n].map = (uint8_t *)ctx.attr[n].map + i_start * vb->stride;
256 }
257
258 size = util_format_get_component_bits(ve->src_format,
259 UTIL_FORMAT_COLORSPACE_RGB, 0);
260 nr_components = util_format_get_nr_components(ve->src_format);
261 switch (size) {
262 case 8:
263 switch (nr_components) {
264 case 1: ctx.attr[n].push = emit_b08_1; break;
265 case 2: ctx.attr[n].push = emit_b16_1; break;
266 case 3: ctx.attr[n].push = emit_b08_3; break;
267 case 4: ctx.attr[n].push = emit_b32_1; break;
268 }
269 ctx.vtx_size++;
270 break;
271 case 16:
272 switch (nr_components) {
273 case 1: ctx.attr[n].push = emit_b16_1; break;
274 case 2: ctx.attr[n].push = emit_b32_1; break;
275 case 3: ctx.attr[n].push = emit_b16_3; break;
276 case 4: ctx.attr[n].push = emit_b32_2; break;
277 }
278 ctx.vtx_size += (nr_components + 1) >> 1;
279 break;
280 case 32:
281 switch (nr_components) {
282 case 1: ctx.attr[n].push = emit_b32_1; break;
283 case 2: ctx.attr[n].push = emit_b32_2; break;
284 case 3: ctx.attr[n].push = emit_b32_3; break;
285 case 4: ctx.attr[n].push = emit_b32_4; break;
286 }
287 ctx.vtx_size += nr_components;
288 break;
289 default:
290 assert(0);
291 return;
292 }
293 }
294 vtx_size = ctx.vtx_size + v_overhead;
295
296 /* map index buffer, if present */
297 if (idxbuf) {
298 struct nouveau_bo *bo = nv50_resource(idxbuf)->bo;
299
300 if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
301 assert(bo->map);
302 return;
303 }
304 ctx.idxbuf = bo->map;
305 ctx.idxbias = idxbias;
306 ctx.idxsize = idxsize;
307 nouveau_bo_unmap(bo);
308 }
309
310 s.priv = &ctx;
311 s.edge = emit_edgeflag;
312 if (idxbuf) {
313 if (idxsize == 1)
314 s.emit = idxbias ? emit_elt08_biased : emit_elt08;
315 else
316 if (idxsize == 2)
317 s.emit = idxbias ? emit_elt16_biased : emit_elt16;
318 else
319 s.emit = idxbias ? emit_elt32_biased : emit_elt32;
320 } else
321 s.emit = emit_verts;
322
323 /* per-instance loop */
324 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
325 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
326 OUT_RING (chan, i_start);
327 while (i_count--) {
328 unsigned max_verts;
329 boolean done;
330
331 for (i = 0; i < ctx.attr_nr; i++) {
332 if (!ctx.attr[i].divisor ||
333 ctx.attr[i].divisor != ++ctx.attr[i].step)
334 continue;
335 ctx.attr[i].step = 0;
336 ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride;
337 }
338
339 util_split_prim_init(&s, mode, start, count);
340 do {
341 if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
342 FIRE_RING(chan);
343 if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
344 assert(0);
345 return;
346 }
347 }
348
349 max_verts = AVAIL_RING(chan);
350 max_verts -= p_overhead;
351 max_verts /= vtx_size;
352
353 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
354 OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
355 done = util_split_prim_next(&s, max_verts);
356 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
357 OUT_RING (chan, 0);
358 } while (!done);
359
360 nzi = TRUE;
361 }
362 }