Merge branch '7.8'
[mesa.git] / src / gallium / drivers / nv50 / nv50_push.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "util/u_inlines.h"
4 #include "util/u_format.h"
5
6 #include "nouveau/nouveau_util.h"
7 #include "nv50_context.h"
8 #include "nv50_resource.h"
9
10 struct push_context {
11 struct nv50_context *nv50;
12
13 unsigned vtx_size;
14
15 void *idxbuf;
16 unsigned idxsize;
17
18 float edgeflag;
19 int edgeflag_attr;
20
21 struct {
22 void *map;
23 unsigned stride;
24 unsigned divisor;
25 unsigned step;
26 void (*push)(struct nouveau_channel *, void *);
27 } attr[16];
28 unsigned attr_nr;
29 };
30
31 static void
32 emit_b32_1(struct nouveau_channel *chan, void *data)
33 {
34 uint32_t *v = data;
35
36 OUT_RING(chan, v[0]);
37 }
38
39 static void
40 emit_b32_2(struct nouveau_channel *chan, void *data)
41 {
42 uint32_t *v = data;
43
44 OUT_RING(chan, v[0]);
45 OUT_RING(chan, v[1]);
46 }
47
48 static void
49 emit_b32_3(struct nouveau_channel *chan, void *data)
50 {
51 uint32_t *v = data;
52
53 OUT_RING(chan, v[0]);
54 OUT_RING(chan, v[1]);
55 OUT_RING(chan, v[2]);
56 }
57
58 static void
59 emit_b32_4(struct nouveau_channel *chan, void *data)
60 {
61 uint32_t *v = data;
62
63 OUT_RING(chan, v[0]);
64 OUT_RING(chan, v[1]);
65 OUT_RING(chan, v[2]);
66 OUT_RING(chan, v[3]);
67 }
68
69 static void
70 emit_b16_1(struct nouveau_channel *chan, void *data)
71 {
72 uint16_t *v = data;
73
74 OUT_RING(chan, v[0]);
75 }
76
77 static void
78 emit_b16_3(struct nouveau_channel *chan, void *data)
79 {
80 uint16_t *v = data;
81
82 OUT_RING(chan, (v[1] << 16) | v[0]);
83 OUT_RING(chan, v[2]);
84 }
85
86 static void
87 emit_b08_1(struct nouveau_channel *chan, void *data)
88 {
89 uint8_t *v = data;
90
91 OUT_RING(chan, v[0]);
92 }
93
94 static void
95 emit_b08_3(struct nouveau_channel *chan, void *data)
96 {
97 uint8_t *v = data;
98
99 OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
100 }
101
102 static INLINE void
103 emit_vertex(struct push_context *ctx, unsigned n)
104 {
105 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
106 struct nouveau_channel *chan = tesla->channel;
107 int i;
108
109 if (ctx->edgeflag_attr < 16) {
110 float *edgeflag = ctx->attr[ctx->edgeflag_attr].map +
111 ctx->attr[ctx->edgeflag_attr].stride * n;
112
113 if (*edgeflag != ctx->edgeflag) {
114 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
115 OUT_RING (chan, *edgeflag ? 1 : 0);
116 ctx->edgeflag = *edgeflag;
117 }
118 }
119
120 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
121 for (i = 0; i < ctx->attr_nr; i++)
122 ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n);
123 }
124
125 static void
126 emit_edgeflag(void *priv, boolean enabled)
127 {
128 struct push_context *ctx = priv;
129 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
130 struct nouveau_channel *chan = tesla->channel;
131
132 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
133 OUT_RING (chan, enabled ? 1 : 0);
134 }
135
136 static void
137 emit_elt08(void *priv, unsigned start, unsigned count)
138 {
139 struct push_context *ctx = priv;
140 uint8_t *idxbuf = ctx->idxbuf;
141
142 while (count--)
143 emit_vertex(ctx, idxbuf[start++]);
144 }
145
146 static void
147 emit_elt16(void *priv, unsigned start, unsigned count)
148 {
149 struct push_context *ctx = priv;
150 uint16_t *idxbuf = ctx->idxbuf;
151
152 while (count--)
153 emit_vertex(ctx, idxbuf[start++]);
154 }
155
156 static void
157 emit_elt32(void *priv, unsigned start, unsigned count)
158 {
159 struct push_context *ctx = priv;
160 uint32_t *idxbuf = ctx->idxbuf;
161
162 while (count--)
163 emit_vertex(ctx, idxbuf[start++]);
164 }
165
166 static void
167 emit_verts(void *priv, unsigned start, unsigned count)
168 {
169 while (count--)
170 emit_vertex(priv, start++);
171 }
172
173 void
174 nv50_push_elements_instanced(struct pipe_context *pipe,
175 struct pipe_resource *idxbuf, unsigned idxsize,
176 unsigned mode, unsigned start, unsigned count,
177 unsigned i_start, unsigned i_count)
178 {
179 struct nv50_context *nv50 = nv50_context(pipe);
180 struct nouveau_grobj *tesla = nv50->screen->tesla;
181 struct nouveau_channel *chan = tesla->channel;
182 struct push_context ctx;
183 const unsigned p_overhead = 4 + /* begin/end */
184 4; /* potential edgeflag enable/disable */
185 const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
186 2; /* potential edgeflag modification */
187 struct u_split_prim s;
188 unsigned vtx_size;
189 boolean nzi = FALSE;
190 int i;
191
192 ctx.nv50 = nv50;
193 ctx.attr_nr = 0;
194 ctx.idxbuf = NULL;
195 ctx.vtx_size = 0;
196 ctx.edgeflag = 0.5f;
197 ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
198
199 /* map vertex buffers, determine vertex size */
200 for (i = 0; i < nv50->vtxelt->num_elements; i++) {
201 struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
202 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
203 struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
204 unsigned size, nr_components, n;
205
206 if (!(nv50->vbo_fifo & (1 << i)))
207 continue;
208 n = ctx.attr_nr++;
209
210 if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
211 assert(bo->map);
212 return;
213 }
214 ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset;
215 nouveau_bo_unmap(bo);
216
217 ctx.attr[n].stride = vb->stride;
218 ctx.attr[n].divisor = ve->instance_divisor;
219 if (ctx.attr[n].divisor) {
220 ctx.attr[n].step = i_start % ve->instance_divisor;
221 ctx.attr[n].map += i_start * vb->stride;
222 }
223
224 size = util_format_get_component_bits(ve->src_format,
225 UTIL_FORMAT_COLORSPACE_RGB, 0);
226 nr_components = util_format_get_nr_components(ve->src_format);
227 switch (size) {
228 case 8:
229 switch (nr_components) {
230 case 1: ctx.attr[n].push = emit_b08_1; break;
231 case 2: ctx.attr[n].push = emit_b16_1; break;
232 case 3: ctx.attr[n].push = emit_b08_3; break;
233 case 4: ctx.attr[n].push = emit_b32_1; break;
234 }
235 ctx.vtx_size++;
236 break;
237 case 16:
238 switch (nr_components) {
239 case 1: ctx.attr[n].push = emit_b16_1; break;
240 case 2: ctx.attr[n].push = emit_b32_1; break;
241 case 3: ctx.attr[n].push = emit_b16_3; break;
242 case 4: ctx.attr[n].push = emit_b32_2; break;
243 }
244 ctx.vtx_size += (nr_components + 1) >> 1;
245 break;
246 case 32:
247 switch (nr_components) {
248 case 1: ctx.attr[n].push = emit_b32_1; break;
249 case 2: ctx.attr[n].push = emit_b32_2; break;
250 case 3: ctx.attr[n].push = emit_b32_3; break;
251 case 4: ctx.attr[n].push = emit_b32_4; break;
252 }
253 ctx.vtx_size += nr_components;
254 break;
255 default:
256 assert(0);
257 return;
258 }
259 }
260 vtx_size = ctx.vtx_size + v_overhead;
261
262 /* map index buffer, if present */
263 if (idxbuf) {
264 struct nouveau_bo *bo = nv50_resource(idxbuf)->bo;
265
266 if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
267 assert(bo->map);
268 return;
269 }
270 ctx.idxbuf = bo->map;
271 ctx.idxsize = idxsize;
272 nouveau_bo_unmap(bo);
273 }
274
275 s.priv = &ctx;
276 s.edge = emit_edgeflag;
277 if (idxbuf) {
278 if (idxsize == 1)
279 s.emit = emit_elt08;
280 else
281 if (idxsize == 2)
282 s.emit = emit_elt16;
283 else
284 s.emit = emit_elt32;
285 } else
286 s.emit = emit_verts;
287
288 /* per-instance loop */
289 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
290 OUT_RING (chan, NV50_CB_AUX | (24 << 8));
291 OUT_RING (chan, i_start);
292 while (i_count--) {
293 unsigned max_verts;
294 boolean done;
295
296 for (i = 0; i < ctx.attr_nr; i++) {
297 if (!ctx.attr[i].divisor ||
298 ctx.attr[i].divisor != ++ctx.attr[i].step)
299 continue;
300 ctx.attr[i].step = 0;
301 ctx.attr[i].map += ctx.attr[i].stride;
302 }
303
304 u_split_prim_init(&s, mode, start, count);
305 do {
306 if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
307 FIRE_RING(chan);
308 if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
309 assert(0);
310 return;
311 }
312 }
313
314 max_verts = AVAIL_RING(chan);
315 max_verts -= p_overhead;
316 max_verts /= vtx_size;
317
318 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
319 OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
320 done = u_split_prim_next(&s, max_verts);
321 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
322 OUT_RING (chan, 0);
323 } while (!done);
324
325 nzi = TRUE;
326 }
327 }