From: Christoph Bumiller Date: Sun, 16 Jan 2011 13:10:46 +0000 (+0100) Subject: nvc0: fix and enable instanced drawing and arrays X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a4742c6a07179f01eebfc486e6cd21be05d9c8ae;p=mesa.git nvc0: fix and enable instanced drawing and arrays --- diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 31302949d5e..61932ff2b6a 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -843,7 +843,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c #define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d #define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e -#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x10000000 +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000 +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000 #define NVC0_3D_VERTEX_DATA 0x00001640 diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index eeb5beff7a7..94117988e50 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -71,7 +71,7 @@ struct nvc0_context { uint32_t dirty; struct { - uint32_t instance_bits; + uint32_t instance_elts; /* bitmask of per-instance elements */ uint32_t instance_base; int32_t index_bias; boolean prim_restart; diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 57a0874e679..aefaf7b98ad 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -132,15 +132,17 @@ nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id) } static INLINE unsigned -nvc0_system_value_location(unsigned sn, unsigned si) +nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input) { /* NOTE: locations 0xfxx indicate special regs */ switch (sn) { /* case TGSI_SEMANTIC_VERTEXID: + *is_input = TRUE; return 0x2fc; */ case TGSI_SEMANTIC_PRIMID: + *is_input = TRUE; return 0x60; /* case TGSI_SEMANTIC_LAYER_INDEX: @@ -149,8 +151,10 @@ nvc0_system_value_location(unsigned sn, unsigned si) return 0x68; */ case TGSI_SEMANTIC_INSTANCEID: + *is_input = TRUE; return 0x2f8; case TGSI_SEMANTIC_FACE: + *is_input = TRUE; return 0x3fc; /* case TGSI_SEMANTIC_INVOCATIONID: @@ -281,7 +285,7 @@ prog_decl(struct nvc0_translation_info *ti, } break; case TGSI_FILE_SYSTEM_VALUE: - ti->sysval_loc[i] = nvc0_system_value_location(sn, si); + ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]); assert(first == last); break; case TGSI_FILE_NULL: @@ -414,6 +418,12 @@ nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) } } + for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { + a = ti->sysval_loc[i] / 4; + if (a > 0 && a < (0xf00 / 4)) + vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32); + } + return 0; } @@ -520,6 +530,12 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) fp->hdr[18] |= 0xf << ti->output_loc[i][0]; } + for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { + a = ti->sysval_loc[i] / 2; + if ((a > 0) && (a < 0xf00 / 2)) + fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32); + } + return 0; } diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index 2e84caecc9e..e6b210d1355 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -63,6 +63,7 @@ struct nvc0_translation_info { uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4]; uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4]; uint16_t sysval_loc[TGSI_SEMANTIC_COUNT]; + boolean sysval_in[TGSI_SEMANTIC_COUNT]; int input_access[PIPE_MAX_SHADER_INPUTS][4]; int output_access[PIPE_MAX_SHADER_OUTPUTS][4]; ubyte interp_mode[PIPE_MAX_SHADER_INPUTS]; diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 941be678586..74c3451c19a 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -26,6 +26,7 @@ struct push_context { boolean primitive_restart; uint32_t prim; uint32_t restart_index; + uint32_t instance_id; }; static INLINE unsigned @@ -75,7 +76,8 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->chan->cur); + ctx->translate->run_elts8(ctx->translate, elts, nr, ctx->instance_id, + ctx->chan->cur); ctx->chan->cur += size; count -= nr; @@ -86,7 +88,8 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) elts++; BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); OUT_RING (ctx->chan, 0); - OUT_RING (ctx->chan, ctx->prim); + OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); } } } @@ -108,7 +111,8 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->chan->cur); + ctx->translate->run_elts16(ctx->translate, elts, nr, ctx->instance_id, + ctx->chan->cur); ctx->chan->cur += size; count -= nr; @@ -119,7 +123,8 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) elts++; BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); OUT_RING (ctx->chan, 0); - OUT_RING (ctx->chan, ctx->prim); + OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); } } } @@ -141,7 +146,8 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->chan->cur); + ctx->translate->run_elts(ctx->translate, elts, nr, ctx->instance_id, + ctx->chan->cur); ctx->chan->cur += size; count -= nr; @@ -152,7 +158,8 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) elts++; BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); OUT_RING (ctx->chan, 0); - OUT_RING (ctx->chan, ctx->prim); + OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); } } } @@ -166,7 +173,8 @@ emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur); + ctx->translate->run(ctx->translate, start, push, ctx->instance_id, + ctx->chan->cur); ctx->chan->cur += size; count -= push; start += push; @@ -244,6 +252,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) ctx.restart_index = 0; } + ctx.instance_id = info->start_instance; ctx.prim = nvc0_prim_gl(info->mode); while (inst--) { @@ -268,6 +277,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) } IMMED_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0); + ctx.instance_id++; ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 54eec660b2a..f608b32e1cb 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -110,6 +110,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_INSTANCED_DRAWING: return 1; default: NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index ee788c5bb9c..6c8028aba13 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -65,7 +65,8 @@ struct nvc0_vertex_element { struct nvc0_vertex_stateobj { struct translate *translate; unsigned num_elements; - uint32_t instance_bits; + uint32_t instance_elts; + uint32_t instance_bufs; unsigned vtx_size; unsigned vtx_per_packet_max; struct nvc0_vertex_element element[1]; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index fecfc76fb79..950bee2eda4 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1085,6 +1085,20 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_PREDICATE: res = bld_fetch_global(bld, &bld->pvs[idx][swz]); break; + case TGSI_FILE_SYSTEM_VALUE: + assert(bld->ti->sysval_loc[idx] < 0xf00); /* >= would mean special reg */ + res = new_value(bld->pc, + bld->pc->is_fragprog ? NV_FILE_MEM_V : NV_FILE_MEM_A, 4); + res->reg.address = bld->ti->sysval_loc[idx]; + + if (res->reg.file == NV_FILE_MEM_A) + res = bld_insn_1(bld, NV_OP_VFETCH, res); + else + res = bld_interp(bld, NVC0_INTERP_FLAT, res); + + /* mesa doesn't do real integers yet :-(and in GL this should be S32) */ + res = bld_cvt(bld, NV_TYPE_F32, NV_TYPE_U32, res); + break; default: NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File); abort(); diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index a14e9557382..a51a887ed89 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -58,7 +58,8 @@ nvc0_vertex_state_create(struct pipe_context *pipe, if (!so) return NULL; so->num_elements = num_elements; - so->instance_bits = 0; + so->instance_elts = 0; + so->instance_bufs = 0; transkey.nr_elements = 0; transkey.output_stride = 0; @@ -85,7 +86,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, } so->element[i].state |= i; - if (likely(!ve->instance_divisor)) { + if (1) { unsigned j = transkey.nr_elements++; transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; @@ -97,8 +98,11 @@ nvc0_vertex_state_create(struct pipe_context *pipe, transkey.element[j].output_format = fmt; transkey.element[j].output_offset = transkey.output_stride; transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; - } else { - so->instance_bits |= 1 << i; + + if (unlikely(ve->instance_divisor)) { + so->instance_elts |= 1 << i; + so->instance_bufs |= 1 << vbi; + } } } @@ -141,6 +145,22 @@ nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb, OUT_RINGf(chan, v[i]); } +static INLINE void +nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, + uint32_t *base, uint32_t *size) +{ + if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { + /* TODO: use min and max instance divisor to get a proper range */ + *base = 0; + *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride; + } else { + assert(nvc0->vbo_max_index != ~0); + *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride; + *size = (nvc0->vbo_max_index - + nvc0->vbo_min_index + 1) * nvc0->vtxbuf[vbi].stride; + } +} + static void nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) { @@ -165,9 +185,7 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) if (buf->status & NVC0_BUFFER_STATUS_USER_MEMORY) { nvc0->vbo_user |= 1 << i; assert(vb->stride > vb->buffer_offset); - size = vb->stride * (nvc0->vbo_max_index - - nvc0->vbo_min_index + 1); - base = vb->stride * nvc0->vbo_min_index; + nvc0_vbuf_range(nvc0, i, &base, &size); nvc0_user_buffer_upload(buf, base, size); } else { nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART); @@ -184,7 +202,6 @@ static void nvc0_update_user_vbufs(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; - const uint32_t vertex_count = nvc0->vbo_max_index - nvc0->vbo_min_index + 1; uint32_t base, offset, size; int i; uint32_t written = 0; @@ -202,8 +219,7 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) nvc0_emit_vtxattr(nvc0, vb, ve, i); continue; } - size = vb->stride * vertex_count; - base = vb->stride * nvc0->vbo_min_index; + nvc0_vbuf_range(nvc0, b, &base, &size); if (!(written & (1 << b))) { written |= 1 << b; @@ -253,13 +269,13 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; if (unlikely(ve->pipe.instance_divisor)) { - if (!(nvc0->state.instance_bits & (1 << i))) { + if (!(nvc0->state.instance_elts & (1 << i))) { IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); } BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1); OUT_RING (chan, ve->pipe.instance_divisor); } else - if (unlikely(nvc0->state.instance_bits & (1 << i))) { + if (unlikely(nvc0->state.instance_elts & (1 << i))) { IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0); } @@ -293,7 +309,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) } nvc0->state.num_vtxelts = vertex->num_elements; - nvc0->state.instance_bits = vertex->instance_bits; + nvc0->state.instance_elts = vertex->instance_elts; } #define NVC0_PRIM_GL_CASE(n) \ @@ -600,17 +616,18 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0_state_validate(nvc0); + if (nvc0->vbo_fifo) { + nvc0_push_vbo(nvc0, info); + return; + } + if (nvc0->state.instance_base != info->start_instance) { nvc0->state.instance_base = info->start_instance; + /* NOTE: this does not affect the shader input, should it ? */ BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1); OUT_RING (chan, info->start_instance); } - if (nvc0->vbo_fifo) { - nvc0_push_vbo(nvc0, info); - return; - } - if (nvc0->vbo_dirty) { BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1); OUT_RING (chan, 0);