Merge branch '7.8'
[mesa.git] / src / gallium / drivers / nv50 / nv50_push.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "util/u_inlines.h"
4 #include "util/u_format.h"
5
6 #include "nouveau/nouveau_util.h"
7 #include "nv50_context.h"
8 #include "nv50_resource.h"
9
10 struct push_context {
11 struct nv50_context *nv50;
12
13 unsigned vtx_size;
14
15 void *idxbuf;
16 int32_t idxbias;
17 unsigned idxsize;
18
19 float edgeflag;
20 int edgeflag_attr;
21
22 struct {
23 void *map;
24 unsigned stride;
25 unsigned divisor;
26 unsigned step;
27 void (*push)(struct nouveau_channel *, void *);
28 } attr[16];
29 unsigned attr_nr;
30 };
31
32 static void
33 emit_b32_1(struct nouveau_channel *chan, void *data)
34 {
35 uint32_t *v = data;
36
37 OUT_RING(chan, v[0]);
38 }
39
40 static void
41 emit_b32_2(struct nouveau_channel *chan, void *data)
42 {
43 uint32_t *v = data;
44
45 OUT_RING(chan, v[0]);
46 OUT_RING(chan, v[1]);
47 }
48
49 static void
50 emit_b32_3(struct nouveau_channel *chan, void *data)
51 {
52 uint32_t *v = data;
53
54 OUT_RING(chan, v[0]);
55 OUT_RING(chan, v[1]);
56 OUT_RING(chan, v[2]);
57 }
58
59 static void
60 emit_b32_4(struct nouveau_channel *chan, void *data)
61 {
62 uint32_t *v = data;
63
64 OUT_RING(chan, v[0]);
65 OUT_RING(chan, v[1]);
66 OUT_RING(chan, v[2]);
67 OUT_RING(chan, v[3]);
68 }
69
70 static void
71 emit_b16_1(struct nouveau_channel *chan, void *data)
72 {
73 uint16_t *v = data;
74
75 OUT_RING(chan, v[0]);
76 }
77
78 static void
79 emit_b16_3(struct nouveau_channel *chan, void *data)
80 {
81 uint16_t *v = data;
82
83 OUT_RING(chan, (v[1] << 16) | v[0]);
84 OUT_RING(chan, v[2]);
85 }
86
87 static void
88 emit_b08_1(struct nouveau_channel *chan, void *data)
89 {
90 uint8_t *v = data;
91
92 OUT_RING(chan, v[0]);
93 }
94
95 static void
96 emit_b08_3(struct nouveau_channel *chan, void *data)
97 {
98 uint8_t *v = data;
99
100 OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
101 }
102
103 static INLINE void
104 emit_vertex(struct push_context *ctx, unsigned n)
105 {
106 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
107 struct nouveau_channel *chan = tesla->channel;
108 int i;
109
110 if (ctx->edgeflag_attr < 16) {
111 float *edgeflag = ctx->attr[ctx->edgeflag_attr].map +
112 ctx->attr[ctx->edgeflag_attr].stride * n;
113
114 if (*edgeflag != ctx->edgeflag) {
115 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
116 OUT_RING (chan, *edgeflag ? 1 : 0);
117 ctx->edgeflag = *edgeflag;
118 }
119 }
120
121 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
122 for (i = 0; i < ctx->attr_nr; i++)
123 ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n);
124 }
125
126 static void
127 emit_edgeflag(void *priv, boolean enabled)
128 {
129 struct push_context *ctx = priv;
130 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
131 struct nouveau_channel *chan = tesla->channel;
132
133 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
134 OUT_RING (chan, enabled ? 1 : 0);
135 }
136
137 static void
138 emit_elt08(void *priv, unsigned start, unsigned count)
139 {
140 struct push_context *ctx = priv;
141 uint8_t *idxbuf = ctx->idxbuf;
142
143 while (count--)
144 emit_vertex(ctx, idxbuf[start++]);
145 }
146
147 static void
148 emit_elt08_biased(void *priv, unsigned start, unsigned count)
149 {
150 struct push_context *ctx = priv;
151 uint8_t *idxbuf = ctx->idxbuf;
152
153 while (count--)
154 emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
155 }
156
157 static void
158 emit_elt16(void *priv, unsigned start, unsigned count)
159 {
160 struct push_context *ctx = priv;
161 uint16_t *idxbuf = ctx->idxbuf;
162
163 while (count--)
164 emit_vertex(ctx, idxbuf[start++]);
165 }
166
167 static void
168 emit_elt16_biased(void *priv, unsigned start, unsigned count)
169 {
170 struct push_context *ctx = priv;
171 uint16_t *idxbuf = ctx->idxbuf;
172
173 while (count--)
174 emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
175 }
176
177 static void
178 emit_elt32(void *priv, unsigned start, unsigned count)
179 {
180 struct push_context *ctx = priv;
181 uint32_t *idxbuf = ctx->idxbuf;
182
183 while (count--)
184 emit_vertex(ctx, idxbuf[start++]);
185 }
186
187 static void
188 emit_elt32_biased(void *priv, unsigned start, unsigned count)
189 {
190 struct push_context *ctx = priv;
191 uint32_t *idxbuf = ctx->idxbuf;
192
193 while (count--)
194 emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
195 }
196
197 static void
198 emit_verts(void *priv, unsigned start, unsigned count)
199 {
200 while (count--)
201 emit_vertex(priv, start++);
202 }
203
204 void
205 nv50_push_elements_instanced(struct pipe_context *pipe,
206 struct pipe_resource *idxbuf,
207 unsigned idxsize, int idxbias,
208 unsigned mode, unsigned start, unsigned count,
209 unsigned i_start, unsigned i_count)
210 {
211 struct nv50_context *nv50 = nv50_context(pipe);
212 struct nouveau_grobj *tesla = nv50->screen->tesla;
213 struct nouveau_channel *chan = tesla->channel;
214 struct push_context ctx;
215 const unsigned p_overhead = 4 + /* begin/end */
216 4; /* potential edgeflag enable/disable */
217 const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
218 2; /* potential edgeflag modification */
219 struct u_split_prim s;
220 unsigned vtx_size;
221 boolean nzi = FALSE;
222 int i;
223
224 ctx.nv50 = nv50;
225 ctx.attr_nr = 0;
226 ctx.idxbuf = NULL;
227 ctx.vtx_size = 0;
228 ctx.edgeflag = 0.5f;
229 ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
230
231 /* map vertex buffers, determine vertex size */
232 for (i = 0; i < nv50->vtxelt->num_elements; i++) {
233 struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
234 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
235 struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
236 unsigned size, nr_components, n;
237
238 if (!(nv50->vbo_fifo & (1 << i)))
239 continue;
240 n = ctx.attr_nr++;
241
242 if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
243 assert(bo->map);
244 return;
245 }
246 ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset;
247 nouveau_bo_unmap(bo);
248
249 ctx.attr[n].stride = vb->stride;
250 ctx.attr[n].divisor = ve->instance_divisor;
251 if (ctx.attr[n].divisor) {
252 ctx.attr[n].step = i_start % ve->instance_divisor;
253 ctx.attr[n].map += i_start * vb->stride;
254 }
255
256 size = util_format_get_component_bits(ve->src_format,
257 UTIL_FORMAT_COLORSPACE_RGB, 0);
258 nr_components = util_format_get_nr_components(ve->src_format);
259 switch (size) {
260 case 8:
261 switch (nr_components) {
262 case 1: ctx.attr[n].push = emit_b08_1; break;
263 case 2: ctx.attr[n].push = emit_b16_1; break;
264 case 3: ctx.attr[n].push = emit_b08_3; break;
265 case 4: ctx.attr[n].push = emit_b32_1; break;
266 }
267 ctx.vtx_size++;
268 break;
269 case 16:
270 switch (nr_components) {
271 case 1: ctx.attr[n].push = emit_b16_1; break;
272 case 2: ctx.attr[n].push = emit_b32_1; break;
273 case 3: ctx.attr[n].push = emit_b16_3; break;
274 case 4: ctx.attr[n].push = emit_b32_2; break;
275 }
276 ctx.vtx_size += (nr_components + 1) >> 1;
277 break;
278 case 32:
279 switch (nr_components) {
280 case 1: ctx.attr[n].push = emit_b32_1; break;
281 case 2: ctx.attr[n].push = emit_b32_2; break;
282 case 3: ctx.attr[n].push = emit_b32_3; break;
283 case 4: ctx.attr[n].push = emit_b32_4; break;
284 }
285 ctx.vtx_size += nr_components;
286 break;
287 default:
288 assert(0);
289 return;
290 }
291 }
292 vtx_size = ctx.vtx_size + v_overhead;
293
294 /* map index buffer, if present */
295 if (idxbuf) {
296 struct nouveau_bo *bo = nv50_resource(idxbuf)->bo;
297
298 if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
299 assert(bo->map);
300 return;
301 }
302 ctx.idxbuf = bo->map;
303 ctx.idxbias = idxbias;
304 ctx.idxsize = idxsize;
305 nouveau_bo_unmap(bo);
306 }
307
308 s.priv = &ctx;
309 s.edge = emit_edgeflag;
310 if (idxbuf) {
311 if (idxsize == 1)
312 s.emit = idxbias ? emit_elt08_biased : emit_elt08;
313 else
314 if (idxsize == 2)
315 s.emit = idxbias ? emit_elt16_biased : emit_elt16;
316 else
317 s.emit = idxbias ? emit_elt32_biased : emit_elt32;
318 } else
319 s.emit = emit_verts;
320
321 /* per-instance loop */
322 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
323 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
324 OUT_RING (chan, i_start);
325 while (i_count--) {
326 unsigned max_verts;
327 boolean done;
328
329 for (i = 0; i < ctx.attr_nr; i++) {
330 if (!ctx.attr[i].divisor ||
331 ctx.attr[i].divisor != ++ctx.attr[i].step)
332 continue;
333 ctx.attr[i].step = 0;
334 ctx.attr[i].map += ctx.attr[i].stride;
335 }
336
337 u_split_prim_init(&s, mode, start, count);
338 do {
339 if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
340 FIRE_RING(chan);
341 if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
342 assert(0);
343 return;
344 }
345 }
346
347 max_verts = AVAIL_RING(chan);
348 max_verts -= p_overhead;
349 max_verts /= vtx_size;
350
351 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
352 OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
353 done = u_split_prim_next(&s, max_verts);
354 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
355 OUT_RING (chan, 0);
356 } while (!done);
357
358 nzi = TRUE;
359 }
360 }