r300g: handle DISCARD_WHOLE_RESOURCE for buffers
[mesa.git] / src / gallium / drivers / nvfx / nvfx_push.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "util/u_inlines.h"
4 #include "util/u_format.h"
5 #include "util/u_split_prim.h"
6 #include "translate/translate.h"
7
8 #include "nvfx_context.h"
9 #include "nvfx_resource.h"
10
11 struct push_context {
12 struct nouveau_channel* chan;
13 struct nouveau_grobj *eng3d;
14
15 void *idxbuf;
16 int32_t idxbias;
17
18 float edgeflag;
19 int edgeflag_attr;
20
21 unsigned vertex_length;
22 unsigned max_vertices_per_packet;
23
24 struct translate* translate;
25 };
26
27 static void
28 emit_edgeflag(void *priv, boolean enabled)
29 {
30 struct push_context* ctx = priv;
31 struct nouveau_grobj *eng3d = ctx->eng3d;
32 struct nouveau_channel *chan = ctx->chan;
33
34 BEGIN_RING(chan, eng3d, NV30_3D_EDGEFLAG, 1);
35 OUT_RING(chan, enabled ? 1 : 0);
36 }
37
38 static void
39 emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
40 {
41 struct push_context *ctx = priv;
42 struct nouveau_grobj *eng3d = ctx->eng3d;
43 uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
44
45 while(count)
46 {
47 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
48 unsigned length = push * ctx->vertex_length;
49
50 BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
51 ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
52 ctx->chan->cur += length;
53
54 count -= push;
55 elts += push;
56 }
57 }
58
59 static void
60 emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
61 {
62 struct push_context *ctx = priv;
63 struct nouveau_grobj *eng3d = ctx->eng3d;
64 uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
65
66 while(count)
67 {
68 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
69 unsigned length = push * ctx->vertex_length;
70
71 BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
72 ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
73 ctx->chan->cur += length;
74
75 count -= push;
76 elts += push;
77 }
78 }
79
80 static void
81 emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
82 {
83 struct push_context *ctx = priv;
84 struct nouveau_grobj *eng3d = ctx->eng3d;
85 uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
86
87 while(count)
88 {
89 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
90 unsigned length = push * ctx->vertex_length;
91
92 BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
93 ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
94 ctx->chan->cur += length;
95
96 count -= push;
97 elts += push;
98 }
99 }
100
101 static void
102 emit_vertices(void *priv, unsigned start, unsigned count)
103 {
104 struct push_context *ctx = priv;
105 struct nouveau_grobj *eng3d = ctx->eng3d;
106
107 while(count)
108 {
109 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
110 unsigned length = push * ctx->vertex_length;
111
112 BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
113 ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
114 ctx->chan->cur += length;
115
116 count -= push;
117 start += push;
118 }
119 }
120
121 static void
122 emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
123 {
124 struct push_context* ctx = priv;
125 struct nouveau_grobj *eng3d = ctx->eng3d;
126 struct nouveau_channel *chan = ctx->chan;
127 unsigned nr = (vc & 0xff);
128 if (nr) {
129 BEGIN_RING(chan, eng3d, reg, 1);
130 OUT_RING (chan, ((nr - 1) << 24) | start);
131 start += nr;
132 }
133
134 nr = vc >> 8;
135 while (nr) {
136 unsigned push = nr > 2047 ? 2047 : nr;
137
138 nr -= push;
139
140 BEGIN_RING_NI(chan, eng3d, reg, push);
141 while (push--) {
142 OUT_RING(chan, ((0x100 - 1) << 24) | start);
143 start += 0x100;
144 }
145 }
146 }
147
148 static void
149 emit_ib_ranges(void* priv, unsigned start, unsigned vc)
150 {
151 emit_ranges(priv, start, vc, NV30_3D_VB_INDEX_BATCH);
152 }
153
154 static void
155 emit_vb_ranges(void* priv, unsigned start, unsigned vc)
156 {
157 emit_ranges(priv, start, vc, NV30_3D_VB_VERTEX_BATCH);
158 }
159
160 static INLINE void
161 emit_elt8(void* priv, unsigned start, unsigned vc)
162 {
163 struct push_context* ctx = priv;
164 struct nouveau_grobj *eng3d = ctx->eng3d;
165 struct nouveau_channel *chan = ctx->chan;
166 uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
167 int idxbias = ctx->idxbias;
168
169 if (vc & 1) {
170 BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
171 OUT_RING (chan, elts[0]);
172 elts++; vc--;
173 }
174
175 while (vc) {
176 unsigned i;
177 unsigned push = MIN2(vc, 2047 * 2);
178
179 BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
180 for (i = 0; i < push; i+=2)
181 OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
182
183 vc -= push;
184 elts += push;
185 }
186 }
187
188 static INLINE void
189 emit_elt16(void* priv, unsigned start, unsigned vc)
190 {
191 struct push_context* ctx = priv;
192 struct nouveau_grobj *eng3d = ctx->eng3d;
193 struct nouveau_channel *chan = ctx->chan;
194 uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
195 int idxbias = ctx->idxbias;
196
197 if (vc & 1) {
198 BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
199 OUT_RING (chan, elts[0]);
200 elts++; vc--;
201 }
202
203 while (vc) {
204 unsigned i;
205 unsigned push = MIN2(vc, 2047 * 2);
206
207 BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
208 for (i = 0; i < push; i+=2)
209 OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
210
211 vc -= push;
212 elts += push;
213 }
214 }
215
216 static INLINE void
217 emit_elt32(void* priv, unsigned start, unsigned vc)
218 {
219 struct push_context* ctx = priv;
220 struct nouveau_grobj *eng3d = ctx->eng3d;
221 struct nouveau_channel *chan = ctx->chan;
222 uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
223 int idxbias = ctx->idxbias;
224
225 while (vc) {
226 unsigned push = MIN2(vc, 2047);
227
228 BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U32, push);
229 if(idxbias)
230 {
231 for(unsigned i = 0; i < push; ++i)
232 OUT_RING(chan, elts[i] + idxbias);
233 }
234 else
235 OUT_RINGp(chan, elts, push);
236
237 vc -= push;
238 elts += push;
239 }
240 }
241
242 void
243 nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
244 {
245 struct nvfx_context *nvfx = nvfx_context(pipe);
246 struct nouveau_channel *chan = nvfx->screen->base.channel;
247 struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
248 struct push_context ctx;
249 struct util_split_prim s;
250 unsigned instances_left = info->instance_count;
251 int vtx_value;
252 unsigned hw_mode = nvgl_primitive(info->mode);
253 int i;
254 struct
255 {
256 uint8_t* map;
257 unsigned step;
258 } per_instance[16];
259 unsigned p_overhead = 64 /* magic fix */
260 + 4 /* begin/end */
261 + 4; /* potential edgeflag enable/disable */
262
263 ctx.chan = nvfx->screen->base.channel;
264 ctx.eng3d = nvfx->screen->eng3d;
265 ctx.translate = nvfx->vtxelt->translate;
266 ctx.idxbuf = NULL;
267 ctx.vertex_length = nvfx->vtxelt->vertex_length;
268 ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet;
269 ctx.edgeflag = 0.5f;
270 // TODO: figure out if we really want to handle this, and do so in that case
271 ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in;
272
273 if(!nvfx->use_vertex_buffers)
274 {
275 for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
276 {
277 struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
278 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
279 uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset;
280 if(info->indexed)
281 data += info->index_bias * vb->stride;
282 ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
283 }
284
285 if(ctx.edgeflag_attr < 16)
286 vtx_value = -(ctx.vertex_length + 3); /* vertex data and edgeflag header and value */
287 else
288 {
289 p_overhead += 1; /* initial vertex_data header */
290 vtx_value = -ctx.vertex_length; /* vertex data and edgeflag header and value */
291 }
292
293 if (info->indexed) {
294 // XXX: this case and is broken and probably need a new VTX_ATTR push path
295 if (nvfx->idxbuf.index_size == 1)
296 s.emit = emit_vertices_lookup8;
297 else if (nvfx->idxbuf.index_size == 2)
298 s.emit = emit_vertices_lookup16;
299 else
300 s.emit = emit_vertices_lookup32;
301 } else
302 s.emit = emit_vertices;
303 }
304 else
305 {
306 if(!info->indexed || nvfx->use_index_buffer)
307 {
308 s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges;
309 p_overhead += 3;
310 vtx_value = 0;
311 }
312 else if (nvfx->idxbuf.index_size == 4)
313 {
314 s.emit = emit_elt32;
315 p_overhead += 1;
316 vtx_value = 8;
317 }
318 else
319 {
320 s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8;
321 p_overhead += 3;
322 vtx_value = 7;
323 }
324 }
325
326 ctx.idxbias = info->index_bias;
327 if(nvfx->use_vertex_buffers)
328 ctx.idxbias -= nvfx->base_vertex;
329
330 /* map index buffer, if present */
331 if (info->indexed && !nvfx->use_index_buffer)
332 ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
333
334 s.priv = &ctx;
335 s.edge = emit_edgeflag;
336
337 for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
338 {
339 struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
340 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
341 float v[4];
342 per_instance[i].step = info->start_instance % ve->instance_divisor;
343 per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset;
344
345 nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
346
347 nvfx_emit_vtx_attr(chan, eng3d,
348 nvfx->vtxelt->per_instance[i].base.idx, v,
349 nvfx->vtxelt->per_instance[i].base.ncomp);
350 }
351
352 /* per-instance loop */
353 while (instances_left--) {
354 int max_verts;
355 boolean done;
356
357 util_split_prim_init(&s, info->mode, info->start, info->count);
358 nvfx_state_emit(nvfx);
359 for(;;) {
360 max_verts = AVAIL_RING(chan);
361 max_verts -= p_overhead;
362
363 /* if vtx_value < 0, each vertex is -vtx_value words long
364 * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation)
365 */
366 if(vtx_value < 0)
367 {
368 max_verts /= -vtx_value;
369 max_verts -= (max_verts >> 10); /* vertex data headers */
370 }
371 else
372 {
373 if(max_verts >= (1 << 23)) /* avoid overflow here */
374 max_verts = (1 << 23);
375 max_verts = (max_verts * 255) >> vtx_value;
376 }
377
378 //printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts);
379
380 if(max_verts >= 16)
381 {
382 /* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */
383 /* this seems to cause issues on nv3x, and also be unneeded there */
384 if(nvfx->is_nv4x)
385 {
386 int i;
387 for(i = 0; i < 32; ++i)
388 {
389 BEGIN_RING(chan, eng3d,
390 0x1dac, 1);
391 OUT_RING(chan, 0);
392 }
393 }
394
395 BEGIN_RING(chan, eng3d,
396 NV30_3D_VERTEX_BEGIN_END, 1);
397 OUT_RING(chan, hw_mode);
398 done = util_split_prim_next(&s, max_verts);
399 BEGIN_RING(chan, eng3d,
400 NV30_3D_VERTEX_BEGIN_END, 1);
401 OUT_RING(chan, 0);
402
403 if(done)
404 break;
405 }
406
407 FIRE_RING(chan);
408 nvfx_state_emit(nvfx);
409 }
410
411 /* set data for the next instance, if any changed */
412 for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
413 {
414 struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
415 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
416
417 if(++per_instance[i].step == ve->instance_divisor)
418 {
419 float v[4];
420 per_instance[i].map += vb->stride;
421 per_instance[i].step = 0;
422
423 nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
424 nvfx_emit_vtx_attr(chan, eng3d,
425 nvfx->vtxelt->per_instance[i].base.idx,
426 v,
427 nvfx->vtxelt->per_instance[i].base.ncomp);
428 }
429 }
430 }
431 }