From 0842829c1813d47e6fdf3de09167d243c029cc90 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 8 Mar 2010 13:27:46 +1000 Subject: [PATCH] nv50: when doing inline indices, split elt lists so they fit into pushbuf --- src/gallium/drivers/nouveau/nouveau_util.h | 100 +++++++++++ src/gallium/drivers/nv50/nv50_vbo.c | 187 +++++++++++---------- 2 files changed, 202 insertions(+), 85 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h index a10114beab9..7f16e31c3f0 100644 --- a/src/gallium/drivers/nouveau/nouveau_util.h +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -88,4 +88,104 @@ static INLINE unsigned log2i(unsigned i) return r; } +struct u_split_prim { + void *priv; + void (*emit)(void *priv, unsigned start, unsigned count); + void (*edge)(void *priv, boolean enabled); + + unsigned mode; + unsigned start; + unsigned p_start; + unsigned p_end; + + int repeat_first:1; + int close_first:1; + int edgeflag_off:1; +}; + +static inline void +u_split_prim_init(struct u_split_prim *s, + unsigned mode, unsigned start, unsigned count) +{ + if (mode == PIPE_PRIM_LINE_LOOP) { + s->mode = PIPE_PRIM_LINE_STRIP; + s->close_first = 1; + } else { + s->mode = mode; + s->close_first = 0; + } + s->start = start; + s->p_start = start; + s->p_end = start + count; + s->edgeflag_off = 0; + s->repeat_first = 0; +} + +static INLINE boolean +u_split_prim_next(struct u_split_prim *s, unsigned max_verts) +{ + int repeat = 0; + + if (s->repeat_first) { + s->emit(s->priv, s->start, 1); + max_verts--; + if (s->edgeflag_off) { + s->edge(s->priv, TRUE); + s->edgeflag_off = FALSE; + } + } + + if (s->p_start + s->close_first + max_verts >= s->p_end) { + s->emit(s->priv, s->p_start, s->p_end - s->p_start); + if (s->close_first) + s->emit(s->priv, s->start, 1); + return TRUE; + } + + switch (s->mode) { + case PIPE_PRIM_LINES: + max_verts &= ~1; + break; + case PIPE_PRIM_LINE_STRIP: + repeat = 1; + break; + case PIPE_PRIM_POLYGON: + max_verts--; + s->emit(s->priv, s->p_start, max_verts); + s->edge(s->priv, FALSE); + s->emit(s->priv, s->p_start + max_verts, 1); + s->p_start += max_verts; + s->repeat_first = TRUE; + s->edgeflag_off = TRUE; + return FALSE; + case PIPE_PRIM_TRIANGLES: + max_verts = max_verts - (max_verts % 3); + break; + case PIPE_PRIM_TRIANGLE_STRIP: + /* to ensure winding stays correct, always split + * on an even number of generated triangles + */ + max_verts = max_verts & ~1; + repeat = 2; + break; + case PIPE_PRIM_TRIANGLE_FAN: + s->repeat_first = TRUE; + repeat = 1; + break; + case PIPE_PRIM_QUADS: + max_verts &= ~3; + break; + case PIPE_PRIM_QUAD_STRIP: + max_verts &= ~1; + repeat = 2; + break; + default: + break; + } + + s->emit (s->priv, s->p_start, max_verts); + s->p_start += (max_verts - repeat); + return FALSE; +} + #endif diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 54a2fa730a5..8f3c1aaf465 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -25,6 +25,7 @@ #include "util/u_inlines.h" #include "util/u_format.h" +#include "nouveau/nouveau_util.h" #include "nv50_context.h" #define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16) @@ -251,85 +252,83 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1); } -static INLINE boolean -nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, - unsigned start, unsigned count) -{ - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; +struct inline_ctx { + struct nv50_context *nv50; + void *map; +}; - map += start; +static void +inline_elt08(void *priv, unsigned start, unsigned count) +{ + struct inline_ctx *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + uint8_t *map = (uint8_t *)ctx->map + start; if (count & 1) { BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; - count--; + count &= ~1; } - while (count) { - unsigned nr = count > 2046 ? 2046 : count; - int i; - - BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); - for (i = 0; i < nr; i += 2) - OUT_RING (chan, (map[i + 1] << 16) | map[i]); + count >>= 1; + if (!count) + return; - count -= nr; - map += nr; + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count); + while (count--) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; } - return TRUE; } -static INLINE boolean -nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, - unsigned start, unsigned count) +static void +inline_elt16(void *priv, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - - map += start; + struct inline_ctx *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + uint16_t *map = (uint16_t *)ctx->map + start; if (count & 1) { BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); + count &= ~1; map++; - count--; } - while (count) { - unsigned nr = count > 2046 ? 2046 : count; - int i; - - BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); - for (i = 0; i < nr; i += 2) - OUT_RING (chan, (map[i + 1] << 16) | map[i]); + count >>= 1; + if (!count) + return; - count -= nr; - map += nr; + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count); + while (count--) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; } - return TRUE; } -static INLINE boolean -nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, - unsigned start, unsigned count) +static void +inline_elt32(void *priv, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; + struct inline_ctx *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; - map += start; - - while (count) { - unsigned nr = count > 2047 ? 2047 : count; + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count); + OUT_RINGp (chan, (uint32_t *)ctx->map + start, count); +} - BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr); - OUT_RINGp (chan, map, nr); +static void +inline_edgeflag(void *priv, boolean enabled) +{ + struct inline_ctx *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; - count -= nr; - map += nr; - } - return TRUE; + BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, enabled ? 1 : 0); } static void @@ -343,51 +342,68 @@ nv50_draw_elements_inline(struct pipe_context *pipe, struct nouveau_channel *chan = nv50->screen->tesla->channel; struct nouveau_grobj *tesla = nv50->screen->tesla; struct instance a[16]; - unsigned prim = nv50_prim(mode); - void *map; - - map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); - assert(map); - if (!map) + struct inline_ctx ctx; + struct u_split_prim s; + boolean nzi = FALSE; + unsigned overhead; + + overhead = 16*3; /* potential instance adjustments */ + overhead += 4; /* Begin()/End() */ + overhead += 4; /* potential edgeflag disable/reenable */ + overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */ + + s.priv = &ctx; + if (indexSize == 1) + s.emit = inline_elt08; + else + if (indexSize == 2) + s.emit = inline_elt16; + else + s.emit = inline_elt32; + s.edge = inline_edgeflag; + + ctx.nv50 = nv50; + ctx.map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + assert(ctx.map); + if (!ctx.map) return; instance_init(nv50, a, startInstance); - if (!nv50_state_validate(nv50, 0)) + if (!nv50_state_validate(nv50, overhead + 6 + 3)) return; BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); OUT_RING (chan, NV50_CB_AUX | (24 << 8)); OUT_RING (chan, startInstance); while (instanceCount--) { - if (AVAIL_RING(chan) < (7 + 16*3)) { - FIRE_RING(chan); - if (!nv50_state_validate(nv50, 0)) { - assert(0); - return; + unsigned max_verts; + boolean done; + + u_split_prim_init(&s, mode, start, count); + do { + if (AVAIL_RING(chan) < (overhead + 6)) { + FIRE_RING(chan); + if (!nv50_state_validate(nv50, (overhead + 6))) { + assert(0); + return; + } } - } - instance_step(nv50, a); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, prim); - switch (indexSize) { - case 1: - nv50_draw_elements_inline_u08(nv50, map, start, count); - break; - case 2: - nv50_draw_elements_inline_u16(nv50, map, start, count); - break; - case 4: - nv50_draw_elements_inline_u32(nv50, map, start, count); - break; - default: - assert(0); - break; - } - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (chan, 0); + max_verts = AVAIL_RING(chan) - overhead; + if (max_verts > 2047) + max_verts = 2047; + if (indexSize != 4) + max_verts <<= 1; + instance_step(nv50, a); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0)); + done = u_split_prim_next(&s, max_verts); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } while (!done); - prim |= (1 << 28); + nzi = TRUE; } pipe_buffer_unmap(pscreen, indexBuffer); @@ -406,7 +422,8 @@ nv50_draw_elements_instanced(struct pipe_context *pipe, struct instance a[16]; unsigned prim = nv50_prim(mode); - if (indexSize == 1) { + if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) || + indexSize == 1) { nv50_draw_elements_inline(pipe, indexBuffer, indexSize, mode, start, count, startInstance, instanceCount); -- 2.30.2