From 0cfc1304bece9cbc17a39b009956b2830c6fd2f5 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 19 Nov 2015 09:51:03 +0100 Subject: [PATCH] nv50: allow using inline vertex data submit when gl_VertexID is used The hardware can actually generates vertexid when vertices come from a client-side buffer like when glDrawElements is used. This doesn't fix (or break) any piglit tests but it improves the previous attempt of Ilia (c830d19 "nv50: avoid using inline vertex data submit when gl_VertexID is used") The only disadvantage is that only works on G84+, but we don't really care of that weird and old NV50 chipset. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- .../drivers/nouveau/nv50/nv50_program.c | 3 +- .../drivers/nouveau/nv50/nv50_program.h | 2 +- src/gallium/drivers/nouveau/nv50/nv50_push.c | 42 ++++++++++++++++++- .../nouveau/nv50/nv50_state_validate.c | 3 +- src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 11 +---- 5 files changed, 46 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 48057d20f4e..a4b8ddfda95 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -66,7 +66,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; - prog->vp.vertexid = 1; continue; default: break; @@ -383,6 +382,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; + prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index f0016707163..1de5122a56e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -76,9 +76,9 @@ struct nv50_program { ubyte psiz; /* output slot of point size */ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ ubyte edgeflag; - ubyte vertexid; ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ ubyte clpd_nr; + bool need_vertex_id; } vp; struct { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c index f31eaa0e314..cbef95d07f6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_push.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c @@ -24,6 +24,10 @@ struct push_context { struct translate *translate; bool primitive_restart; + + bool need_vertex_id; + int32_t index_bias; + uint32_t prim; uint32_t restart_index; uint32_t instance_id; @@ -74,6 +78,11 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -107,6 +116,11 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -140,6 +154,11 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -161,10 +180,18 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) static void emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) { + uint32_t elts = 0; + while (count) { unsigned push = MIN2(count, ctx->packet_vertex_limit); unsigned size = ctx->vertex_words * push; + if (unlikely(ctx->need_vertex_id)) { + /* For non-indexed draws, gl_VertexID goes up after each vertex. */ + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, elts++); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id, @@ -216,7 +243,14 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.push = nv50->base.pushbuf; ctx.translate = nv50->vertex->translate; - ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit; + + ctx.need_vertex_id = nv50->screen->base.class_3d >= NV84_3D_CLASS && + nv50->vertprog->vp.need_vertex_id && (nv50->vertex->num_elements < 32); + ctx.index_bias = info->index_bias; + + /* For indexed draws, gl_VertexID must be emitted for every vertex. */ + ctx.packet_vertex_limit = + ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit; ctx.vertex_words = nv50->vertex->vertex_size; assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS); @@ -307,4 +341,10 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.instance_id++; ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } + + if (unlikely(ctx.need_vertex_id)) { + /* Reset gl_VertexID to prevent future indexed draws to be confused. */ + BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx.push, nv50->state.index_bias); + } } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index b6181edf24f..02a759c23ad 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -503,8 +503,7 @@ static struct state_validate { { nv50_validate_samplers, NV50_NEW_SAMPLERS }, { nv50_stream_output_validate, NV50_NEW_STRMOUT | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS | - NV50_NEW_VERTPROG }, + { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index ac0c4d99dbb..85878d5fcc7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -294,8 +294,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) uint64_t addrs[PIPE_MAX_ATTRIBS]; uint32_t limits[PIPE_MAX_ATTRIBS]; struct nouveau_pushbuf *push = nv50->base.pushbuf; - struct nv50_vertex_stateobj dummy = {}; - struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy; + struct nv50_vertex_stateobj *vertex = nv50->vertex; struct pipe_vertex_buffer *vb; struct nv50_vertex_element *ve; uint32_t mask; @@ -303,14 +302,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) unsigned i; const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts); - /* A vertexid is not generated for inline data uploads. Have to use a - * VBO. This check must come after the vertprog has been validated, - * otherwise vertexid may be unset. - */ - assert(nv50->vertprog->translated); - if (nv50->vertprog->vp.vertexid) - nv50->vbo_push_hint = 0; - if (unlikely(vertex->need_conversion)) nv50->vbo_fifo = ~0; else -- 2.30.2