From 7b6881932a71b36dd47f63200c9dbee8e2b9af4f Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 12 Jan 2012 19:12:02 +0100 Subject: [PATCH] nvc0: fix submission of VertexID and EdgeFlag in push mode NOTE: This is a candidate for the 8.0 branch. --- src/gallium/drivers/nv50/codegen/nv50_ir.cpp | 1 + .../drivers/nv50/codegen/nv50_ir_driver.h | 1 + .../nv50/codegen/nv50_ir_from_tgsi.cpp | 7 ++ src/gallium/drivers/nvc0/nvc0_3d.xml.h | 5 ++ src/gallium/drivers/nvc0/nvc0_context.h | 3 - src/gallium/drivers/nvc0/nvc0_program.c | 8 +- src/gallium/drivers/nvc0/nvc0_program.h | 3 +- src/gallium/drivers/nvc0/nvc0_push.c | 74 +++++++++++++++---- src/gallium/drivers/nvc0/nvc0_vbo.c | 3 +- 9 files changed, 84 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp index a8f05290df2..7d9b7e2b760 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp @@ -938,6 +938,7 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) } info->io.clipDistance = 0xff; info->io.pointSize = 0xff; + info->io.vertexId = 0xff; info->io.edgeFlagIn = 0xff; info->io.edgeFlagOut = 0xff; info->io.fragDepth = 0xff; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h index 73fb023c378..a37a29a99a6 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h @@ -155,6 +155,7 @@ struct nv50_ir_prog_info uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ int8_t genUserClip; /* request user clip planes for ClipVertex */ uint8_t pointSize; /* output index for PointSize */ + uint8_t vertexId; /* system value index of VertexID */ uint8_t edgeFlagIn; uint8_t edgeFlagOut; uint8_t fragDepth; /* output index of FragDepth */ diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index a1c8f84d7ed..c104dbe9670 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -864,6 +864,13 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) } break; case TGSI_FILE_SYSTEM_VALUE: + switch (sn) { + case TGSI_SEMANTIC_VERTEXID: + info->io.vertexId = first; + break; + default: + break; + } for (i = first; i <= last; ++i, ++si) { info->sv[i].sn = sn; info->sv[i].si = si; diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 8d6ea7e2745..6a1dff70f76 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -913,6 +913,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000 #define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000 +#define NVC0_3D_VERTEX_ID_REPLACE 0x0000161c +#define NVC0_3D_VERTEX_ID_REPLACE_ENABLE 0x00000001 +#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__MASK 0x00000ff0 +#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT 4 + #define NVC0_3D_VERTEX_DATA 0x00001640 #define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644 diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index af95d1ab6aa..b8f0d92b6b4 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -134,9 +134,6 @@ struct nvc0_context { struct draw_context *draw; }; -#define NVC0_USING_EDGEFLAG(ctx) \ - ((ctx)->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS) - static INLINE struct nvc0_context * nvc0_context(struct pipe_context *pipe) { diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 813008cd537..cff76fe67f3 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -107,7 +107,7 @@ nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) for (n = 0, i = 0; i < info->numInputs; ++i) { switch (info->in[i].sn) { - case TGSI_SEMANTIC_INSTANCEID: + case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */ case TGSI_SEMANTIC_VERTEXID: info->in[i].mask = 0x1; info->in[i].slot[0] = @@ -580,7 +580,11 @@ nvc0_program_translate(struct nvc0_program *prog) prog->relocs = info->bin.relocData; prog->max_gpr = MAX2(4, (info->bin.maxGPR + 1)); - prog->vp.edgeflag = PIPE_MAX_ATTRIBS; + prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + + if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS) + info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */ + prog->vp.edgeflag = info->io.edgeFlagIn; switch (prog->type) { case PIPE_SHADER_VERTEX: diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index c384ef534d5..6eb8c96d60a 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -37,8 +37,9 @@ struct nvc0_program { struct { uint32_t clip_mode; /* clip/cull selection */ uint8_t clip_enable; /* mask of defined clip planes */ - uint8_t edgeflag; uint8_t num_ucps; /* also set to max if ClipDistance is used */ + uint8_t edgeflag; /* attribute index of edgeflag input */ + boolean need_vertex_id; } vp; struct { uint8_t early_z; diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 238671d721c..412186c2345 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -21,6 +21,7 @@ struct push_context { struct translate *translate; boolean primitive_restart; + boolean need_vertex_id; uint32_t prim; uint32_t restart_index; uint32_t instance_id; @@ -42,22 +43,23 @@ init_push_context(struct nvc0_context *nvc0, struct push_context *ctx) ctx->chan = nvc0->screen->base.channel; ctx->translate = nvc0->vertex->translate; + if (likely(nvc0->vertex->num_elements < 32)) + ctx->need_vertex_id = nvc0->vertprog->vp.need_vertex_id; + else + ctx->need_vertex_id = FALSE; + + ctx->edgeflag.buffer = -1; ctx->edgeflag.value = 0.5f; - if (NVC0_USING_EDGEFLAG(nvc0)) { + if (unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) { ve = &nvc0->vertex->element[nvc0->vertprog->vp.edgeflag].pipe; - ctx->edgeflag.buffer = ve->vertex_buffer_index; ctx->edgeflag.offset = ve->src_offset; - ctx->packet_vertex_limit = 1; } else { - ctx->edgeflag.buffer = -1; - ctx->edgeflag.offset = 0; - ctx->edgeflag.stride = 0; - ctx->edgeflag.data = NULL; - ctx->packet_vertex_limit = nvc0->vertex->vtx_per_packet_max; + if (unlikely(ctx->need_vertex_id)) + ctx->packet_vertex_limit = 1; } ctx->vertex_words = nvc0->vertex->vtx_size; @@ -74,6 +76,17 @@ set_edgeflag(struct push_context *ctx, unsigned vtx_id) } } +static INLINE void +set_vertexid(struct push_context *ctx, uint32_t vtx_id) +{ +#if 0 + BEGIN_RING(ctx->chan, RING_3D(VERTEX_ID), 1); /* broken on nvc0 */ +#else + BEGIN_RING(ctx->chan, RING_3D(VERTEX_DATA), 1); /* as last attribute */ +#endif + OUT_RING (ctx->chan, vtx_id); +} + static INLINE unsigned prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) { @@ -117,7 +130,7 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) if (ctx->primitive_restart) nr = prim_restart_search_i08(elts, push, ctx->restart_index); - if (unlikely(ctx->edgeflag.buffer >= 0) && nr) + if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr)) set_edgeflag(ctx, elts[0]); size = ctx->vertex_words * nr; @@ -126,8 +139,11 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) ctx->translate->run_elts8(ctx->translate, elts, nr, ctx->instance_id, ctx->chan->cur); - ctx->chan->cur += size; + + if (unlikely(ctx->need_vertex_id) && likely(size)) + set_vertexid(ctx, elts[0]); + count -= nr; elts += nr; @@ -155,7 +171,7 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) if (ctx->primitive_restart) nr = prim_restart_search_i16(elts, push, ctx->restart_index); - if (unlikely(ctx->edgeflag.buffer >= 0) && nr) + if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr)) set_edgeflag(ctx, elts[0]); size = ctx->vertex_words * nr; @@ -164,8 +180,11 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) ctx->translate->run_elts16(ctx->translate, elts, nr, ctx->instance_id, ctx->chan->cur); - ctx->chan->cur += size; + + if (unlikely(ctx->need_vertex_id)) + set_vertexid(ctx, elts[0]); + count -= nr; elts += nr; @@ -193,7 +212,7 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) if (ctx->primitive_restart) nr = prim_restart_search_i32(elts, push, ctx->restart_index); - if (unlikely(ctx->edgeflag.buffer >= 0) && nr) + if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr)) set_edgeflag(ctx, elts[0]); size = ctx->vertex_words * nr; @@ -202,8 +221,11 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) ctx->translate->run_elts(ctx->translate, elts, nr, ctx->instance_id, ctx->chan->cur); - ctx->chan->cur += size; + + if (unlikely(ctx->need_vertex_id)) + set_vertexid(ctx, elts[0]); + count -= nr; elts += nr; @@ -233,6 +255,10 @@ emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) ctx->translate->run(ctx->translate, start, push, ctx->instance_id, ctx->chan->cur); ctx->chan->cur += size; + + if (unlikely(ctx->need_vertex_id)) + set_vertexid(ctx, start); + count -= push; start += push; } @@ -326,6 +352,16 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) ctx.instance_id = info->start_instance; ctx.prim = nvc0_prim_gl(info->mode); + if (unlikely(ctx.need_vertex_id)) { + const unsigned a = nvc0->vertex->num_elements; + BEGIN_RING(ctx.chan, RING_3D(VERTEX_ATTRIB_FORMAT(a)), 1); + OUT_RING (ctx.chan, (a << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); + BEGIN_RING(ctx.chan, RING_3D(VERTEX_ID_REPLACE), 1); + OUT_RING (ctx.chan, (((0x80 + a * 0x10) / 4) << 4) | 1); + } + while (inst_count--) { BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1); OUT_RING (ctx.chan, ctx.prim); @@ -355,6 +391,16 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) if (unlikely(ctx.edgeflag.value == 0.0f)) IMMED_RING(ctx.chan, RING_3D(EDGEFLAG_ENABLE), 1); + if (unlikely(ctx.need_vertex_id)) { + const unsigned a = nvc0->vertex->num_elements; + IMMED_RING(ctx.chan, RING_3D(VERTEX_ID_REPLACE), 0); + BEGIN_RING(ctx.chan, RING_3D(VERTEX_ATTRIB_FORMAT(a)), 1); + OUT_RING (ctx.chan, + NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); + } + if (info->indexed) nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 7cf69910e6a..3e95d509f99 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -263,7 +263,8 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) struct nvc0_vertex_element *ve; unsigned i; - if (unlikely(vertex->need_conversion || NVC0_USING_EDGEFLAG(nvc0))) { + if (unlikely(vertex->need_conversion) || + unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) { nvc0->vbo_fifo = ~0; nvc0->vbo_user = 0; } else { -- 2.30.2