From 0d6e3dd84d21a9c0a60cd64304d39d8928f37238 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 18 Jan 2010 19:52:34 +0100 Subject: [PATCH] nv50: make instanced drawing work with edge flags And fix some obvious mistakes introduced in the previous instancing commit. --- src/gallium/drivers/nv50/nv50_vbo.c | 207 ++++++++++++++++++++++------ 1 file changed, 162 insertions(+), 45 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 32386294616..b9cf0754b77 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -40,6 +40,8 @@ nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned); static boolean nv50_push_arrays(struct nv50_context *, unsigned, unsigned); +#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16) + static INLINE unsigned nv50_prim(unsigned mode) { @@ -171,7 +173,7 @@ nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i) struct pipe_buffer *buf = nscreen->strm_vbuf[i]; struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; uint8_t *src; - unsigned size = MAX2(vb->buffer->size, 4096); + unsigned size = align(vb->buffer->size, 4096); if (buf && buf->size < size) pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL); @@ -215,6 +217,74 @@ nv50_upload_user_vbufs(struct nv50_context *nv50) } } +static void +nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + float v[4]; + + util_format_read_4f(nv50->vtxelt[i].src_format, + v, 0, data, 0, 0, 0, 1, 1); + + switch (nv50->vtxelt[i].nr_components) { + case 4: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4); + OUT_RING (chan, v[0]); + OUT_RING (chan, v[1]); + OUT_RING (chan, v[2]); + OUT_RING (chan, v[3]); + break; + case 3: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 4); + OUT_RING (chan, v[0]); + OUT_RING (chan, v[1]); + OUT_RING (chan, v[2]); + break; + case 2: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 4); + OUT_RING (chan, v[0]); + OUT_RING (chan, v[1]); + break; + case 1: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 4); + OUT_RING (chan, v[0]); + break; + default: + assert(0); + break; + } +} + +static unsigned +init_per_instance_arrays_immd(struct nv50_context *nv50, + unsigned startInstance, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_bo *bo; + unsigned i, b, count = 0; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + ++count; + b = nv50->vtxelt[i].vertex_buffer_index; + + pos[i] = nv50->vtxelt[i].src_offset + + nv50->vtxbuf[b].buffer_offset + + startInstance * nv50->vtxbuf[b].stride; + step[i] = startInstance % nv50->vtxelt[i].instance_divisor; + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + if (!bo->map) + nouveau_bo_map(bo, NOUVEAU_BO_RD); + + nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); + } + + return count; +} + static unsigned init_per_instance_arrays(struct nv50_context *nv50, unsigned startInstance, @@ -227,6 +297,10 @@ init_per_instance_arrays(struct nv50_context *nv50, unsigned i, b, count = 0; const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + if (nv50->vbo_fifo) + return init_per_instance_arrays_immd(nv50, startInstance, + pos, step); + so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); for (i = 0; i < nv50->vtxelt_nr; ++i) { @@ -248,11 +322,11 @@ init_per_instance_arrays(struct nv50_context *nv50, bo = nouveau_bo(nv50->vtxbuf[b].buffer); so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); - so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); } - if (count) { + if (count && startInstance) { so_ref (so, &nv50->state.instbuf); /* for flush notify */ so_emit(chan, nv50->state.instbuf); } @@ -261,6 +335,28 @@ init_per_instance_arrays(struct nv50_context *nv50, return count; } +static void +step_per_instance_arrays_immd(struct nv50_context *nv50, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_bo *bo; + unsigned i, b; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + if (++step[i] != nv50->vtxelt[i].instance_divisor) + continue; + b = nv50->vtxelt[i].vertex_buffer_index; + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + step[i] = 0; + pos[i] += nv50->vtxbuf[b].stride; + + nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); + } +} + static void step_per_instance_arrays(struct nv50_context *nv50, unsigned pos[16], unsigned step[16]) @@ -272,6 +368,11 @@ step_per_instance_arrays(struct nv50_context *nv50, unsigned i, b; const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + if (nv50->vbo_fifo) { + step_per_instance_arrays_immd(nv50, pos, step); + return; + } + so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); for (i = 0; i < nv50->vtxelt_nr; ++i) { @@ -287,8 +388,8 @@ step_per_instance_arrays(struct nv50_context *nv50, bo = nouveau_bo(nv50->vtxbuf[b].buffer); so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); - so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); } so_ref (so, &nv50->state.instbuf); /* for flush notify */ @@ -297,6 +398,16 @@ step_per_instance_arrays(struct nv50_context *nv50, so_emit(chan, nv50->state.instbuf); } +static INLINE void +nv50_unmap_vbufs(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) + if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) + nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); +} + void nv50_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count, @@ -308,7 +419,8 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe, unsigned i, nz_divisors; unsigned step[16], pos[16]; - nv50_upload_user_vbufs(nv50); + if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) + nv50_upload_user_vbufs(nv50); nv50_state_validate(nv50); @@ -320,9 +432,14 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(mode)); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); - OUT_RING (chan, start); - OUT_RING (chan, count); + + if (nv50->vbo_fifo) + nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + } BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); @@ -332,12 +449,18 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(mode) | (1 << 28)); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); - OUT_RING (chan, start); - OUT_RING (chan, count); + + if (nv50->vbo_fifo) + nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + } BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); } + nv50_unmap_vbufs(nv50); so_ref(NULL, &nv50->state.instbuf); } @@ -372,6 +495,8 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); + nv50_unmap_vbufs(nv50); + /* XXX: not sure what to do if ret != TRUE: flush and retry? */ assert(ret); @@ -400,7 +525,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -433,7 +558,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -458,7 +583,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -502,7 +627,8 @@ nv50_draw_elements_instanced(struct pipe_context *pipe, map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); - nv50_upload_user_vbufs(nv50); + if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) + nv50_upload_user_vbufs(nv50); nv50_state_validate(nv50); @@ -513,7 +639,7 @@ nv50_draw_elements_instanced(struct pipe_context *pipe, OUT_RING (chan, startInstance); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, nv50_prim(mode) | (1 << 28)); + OUT_RING (chan, nv50_prim(mode)); nv50_draw_elements_inline(nv50, map, indexSize, start, count); @@ -532,6 +658,7 @@ nv50_draw_elements_instanced(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); } + nv50_unmap_vbufs(nv50); so_ref(NULL, &nv50->state.instbuf); } @@ -564,6 +691,8 @@ nv50_draw_elements(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); + nv50_unmap_vbufs(nv50); + pipe_buffer_unmap(pscreen, indexBuffer); } @@ -644,7 +773,7 @@ nv50_vbo_validate(struct nv50_context *nv50) !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; - if (nv50->vertprog->cfg.edgeflag_in < 16) + if (NV50_USING_LOATHED_EDGEFLAG(nv50)) nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); @@ -672,14 +801,16 @@ nv50_vbo_validate(struct nv50_context *nv50) nv50->vbo_fifo &= ~(1 << i); continue; } - so_data(vtxfmt, hw | i); if (nv50->vbo_fifo) { + so_data (vtxfmt, hw | + (ve->instance_divisor ? (1 << 4) : i)); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); so_data (vtxbuf, 0); continue; } + so_data(vtxfmt, hw | i); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); so_data (vtxbuf, 0x20000000 | @@ -721,7 +852,7 @@ typedef void (*pfn_push)(struct nouveau_channel *, void *); struct nv50_vbo_emitctx { pfn_push push[16]; - void *map[16]; + uint8_t *map[16]; unsigned stride[16]; unsigned nr_ve; unsigned vtx_dwords; @@ -759,19 +890,18 @@ nv50_map_vbufs(struct nv50_context *nv50) for (i = 0; i < nv50->vtxbuf_nr; ++i) { struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; - unsigned size, delta; + unsigned size = vb->stride * (vb->max_index + 1) + 16; if (nouveau_bo(vb->buffer)->map) continue; - size = vb->stride * (vb->max_index + 1); - delta = vb->buffer_offset; - + size = vb->stride * (vb->max_index + 1) + 16; + size = MIN2(size, vb->buffer->size); if (!size) - size = vb->buffer->size - vb->buffer_offset; + size = vb->buffer->size; if (nouveau_bo_map_range(nouveau_bo(vb->buffer), - delta, size, NOUVEAU_BO_RD)) + 0, size, NOUVEAU_BO_RD)) break; } @@ -782,16 +912,6 @@ nv50_map_vbufs(struct nv50_context *nv50) return FALSE; } -static INLINE void -nv50_unmap_vbufs(struct nv50_context *nv50) -{ - unsigned i; - - for (i = 0; i < nv50->vtxbuf_nr; ++i) - if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) - nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); -} - static void emit_b32_1(struct nouveau_channel *chan, void *data) { @@ -886,12 +1006,13 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, ve = &nv50->vtxelt[i]; vb = &nv50->vtxbuf[ve->vertex_buffer_index]; - if (!(nv50->vbo_fifo & (1 << i))) + if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor) continue; n = emit->nr_ve++; emit->stride[n] = vb->stride; - emit->map[n] = nouveau_bo(vb->buffer)->map + + emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map + + vb->buffer_offset + (start * vb->stride + ve->src_offset); desc = util_format_description(ve->src_format); @@ -981,13 +1102,12 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx_next(chan, &emit); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -1008,13 +1128,12 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -1035,13 +1154,12 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -1062,13 +1180,12 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } -- 2.30.2