From f6b2e8af7425c67f8def9dfba92f6f0ad9585b40 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 1 Oct 2014 23:13:22 -0400 Subject: [PATCH] freedreno/a3xx: add support for vertexid and instanceid sysvals Signed-off-by: Ilia Mirkin --- docs/relnotes/10.6.0.html | 1 + src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 46 ++++++++--- .../drivers/freedreno/freedreno_screen.c | 8 +- .../drivers/freedreno/ir3/ir3_compiler.c | 80 ++++++++++++++++++- .../drivers/freedreno/ir3/ir3_shader.h | 1 + 5 files changed, 120 insertions(+), 16 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index d201a65db39..bedbd4bab9e 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers. diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 1c17e2ddde0..ad5fcb35cf5 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -351,21 +351,31 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) { - uint32_t i, j, last = 0; + int32_t i, j, last = -1; uint32_t total_in = 0; const struct fd_vertex_state *vtx = emit->vtx; struct ir3_shader_variant *vp = fd3_emit_get_vp(emit); - unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count); + unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0); + + for (i = 0; i < vp->inputs_count; i++) { + uint8_t semantic = sem2name(vp->inputs[i].semantic); + if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE) + vertex_regid = vp->inputs[i].regid; + else if (semantic == TGSI_SEMANTIC_INSTANCEID) + instance_regid = vp->inputs[i].regid; + else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) + last = i; + } /* hw doesn't like to be configured for zero vbo's, it seems: */ - if (vtx->vtx->num_elements == 0) + if (vtx->vtx->num_elements == 0 && + vertex_regid == regid(63, 0) && + instance_regid == regid(63, 0)) return; - for (i = 0; i < n; i++) - if (vp->inputs[i].compmask) - last = i; - for (i = 0, j = 0; i <= last; i++) { + uint8_t semantic = sem2name(vp->inputs[i].semantic); + assert(semantic == 0); if (vp->inputs[i].compmask) { struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; const struct pipe_vertex_buffer *vb = @@ -373,7 +383,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) struct fd_resource *rsc = fd_resource(vb->buffer); enum pipe_format pfmt = elem->src_format; enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt); - bool switchnext = (i != last); + bool switchnext = (i != last) || + vertex_regid != regid(63, 0) || + instance_regid != regid(63, 0); bool isint = util_format_is_pure_integer(pfmt); uint32_t fs = util_format_get_blocksize(pfmt); @@ -409,8 +421,8 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) | A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j)); OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX - A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) | - A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0))); + A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) | + A3XX_VFD_CONTROL_1_REGID4INST(instance_regid)); } void @@ -580,6 +592,20 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } } + /* emit driver params every time */ + if (emit->info && emit->prog == &ctx->prog) { + uint32_t vertex_params[4] = { + emit->info->indexed ? emit->info->index_bias : emit->info->start, + 0, + 0, + 0 + }; + if (vp->constlen > vp->first_driver_param) { + fd3_emit_constant(ring, SB_VERT_SHADER, vp->first_driver_param * 4, + 0, 4, vertex_params, NULL); + } + } + if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) { struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend); uint32_t i; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 1ce96d3ba95..7952c04d35c 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -160,7 +160,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: @@ -172,6 +171,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_USER_CONSTANT_BUFFERS: case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + case PIPE_CAP_VERTEXID_NOBASE: return 1; case PIPE_CAP_SHADER_STENCIL_EXPORT: @@ -186,7 +186,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: - case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_START_INSTANCE: case PIPE_CAP_COMPUTE: return 0; @@ -195,6 +195,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PRIMITIVE_RESTART: return is_a3xx(screen) || is_a4xx(screen); + case PIPE_CAP_TGSI_INSTANCEID: + return is_a3xx(screen) && glsl130; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; @@ -228,7 +231,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_CLIP_HALFZ: - case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: return 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 8c88bf7db47..3ee9642efcc 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -83,6 +83,9 @@ struct ir3_compile_context { */ struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4]; + /* For vertex shaders, keep track of the system values sources */ + struct ir3_instruction *vertex_id, *basevertex, *instance_id; + struct tgsi_parse_context parser; unsigned type; @@ -105,6 +108,9 @@ struct ir3_compile_context { unsigned num_internal_temps; struct tgsi_src_register internal_temps[8]; + /* for looking up which system value is which */ + unsigned sysval_semantics[8]; + /* idx/slot for last compiler generated immediate */ unsigned immediate_idx; @@ -222,6 +228,8 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, ctx->atomic = false; ctx->frag_pos = NULL; ctx->frag_face = NULL; + ctx->vertex_id = NULL; + ctx->instance_id = NULL; ctx->tmp_src = NULL; ctx->using_tmp_dst = false; @@ -239,7 +247,7 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, * the assembler what the max addr reg value can be: */ if (info->indirect_files & FM(CONSTANT)) - so->constlen = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1); + so->constlen = ctx->info.file_max[TGSI_FILE_CONSTANT] + 1; i = 0; i += setup_arrays(ctx, TGSI_FILE_INPUT, i); @@ -248,7 +256,12 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, /* any others? we don't track arrays for const..*/ /* Immediates go after constants: */ - so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1; + if (so->type == SHADER_VERTEX) { + so->first_driver_param = info->file_max[TGSI_FILE_CONSTANT] + 1; + so->first_immediate = so->first_driver_param + 1; + } else { + so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1; + } ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); ret = tgsi_parse_init(&ctx->parser, ctx->tokens); @@ -355,7 +368,7 @@ push_block(struct ir3_compile_context *ctx) ntmp += 8 * 4; nout = SCALAR_REGS(OUTPUT); - nin = SCALAR_REGS(INPUT); + nin = SCALAR_REGS(INPUT) + SCALAR_REGS(SYSTEM_VALUE); /* for outermost block, 'inputs' are the actual shader INPUT * register file. Reads from INPUT registers always go back to @@ -555,6 +568,19 @@ ssa_instr(struct ir3_compile_context *ctx, unsigned file, unsigned n) block->temporaries[n] = instr; } break; + case TGSI_FILE_SYSTEM_VALUE: + switch (ctx->sysval_semantics[n >> 2]) { + case TGSI_SEMANTIC_VERTEXID_NOBASE: + instr = ctx->vertex_id; + break; + case TGSI_SEMANTIC_BASEVERTEX: + instr = ctx->basevertex; + break; + case TGSI_SEMANTIC_INSTANCEID: + instr = ctx->instance_id; + break; + } + break; } return instr; @@ -735,6 +761,7 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx, */ case TGSI_FILE_INPUT: case TGSI_FILE_TEMPORARY: + case TGSI_FILE_SYSTEM_VALUE: /* uses SSA */ break; default: @@ -2934,6 +2961,51 @@ decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) } } +static void +decl_sv(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) +{ + struct ir3_shader_variant *so = ctx->so; + unsigned r = regid(so->inputs_count, 0); + unsigned n = so->inputs_count++; + + DBG("decl sv -> r%d", n); + + compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); + compile_assert(ctx, decl->Range.First < ARRAY_SIZE(ctx->sysval_semantics)); + + ctx->sysval_semantics[decl->Range.First] = decl->Semantic.Name; + so->inputs[n].semantic = decl_semantic(&decl->Semantic); + so->inputs[n].compmask = 1; + so->inputs[n].regid = r; + so->inputs[n].inloc = ctx->next_inloc; + so->inputs[n].interpolate = false; + + struct ir3_instruction *instr = NULL; + + switch (decl->Semantic.Name) { + case TGSI_SEMANTIC_VERTEXID_NOBASE: + ctx->vertex_id = instr = create_input(ctx->block, NULL, r); + break; + case TGSI_SEMANTIC_BASEVERTEX: + ctx->basevertex = instr = instr_create(ctx, 1, 0); + instr->cat1.src_type = get_stype(ctx); + instr->cat1.dst_type = get_stype(ctx); + ir3_reg_create(instr, 0, 0); + ir3_reg_create(instr, regid(so->first_driver_param, 0), IR3_REG_CONST); + break; + case TGSI_SEMANTIC_INSTANCEID: + ctx->instance_id = instr = create_input(ctx->block, NULL, r); + break; + default: + compile_error(ctx, "Unknown semantic: %s\n", + tgsi_semantic_names[decl->Semantic.Name]); + } + + ctx->block->inputs[r] = instr; + ctx->next_inloc++; + so->total_in++; +} + static void decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) { @@ -3099,6 +3171,8 @@ compile_instructions(struct ir3_compile_context *ctx) decl_out(ctx, decl); } else if (file == TGSI_FILE_INPUT) { decl_in(ctx, decl); + } else if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + decl_sv(ctx, decl); } if ((file != TGSI_FILE_CONSTANT) && decl->Declaration.Array) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 5207185b220..e5d57af1ea6 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -182,6 +182,7 @@ struct ir3_shader_variant { * (not regid, because TGSI thinks in terms of vec4 registers, * not scalar registers) */ + unsigned first_driver_param; unsigned first_immediate; unsigned immediates_count; struct { -- 2.30.2