From: Kristian H. Kristensen Date: Wed, 27 Mar 2019 22:31:49 +0000 (-0700) Subject: freedreno/ir3: Add workaround for VS samgq X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=107a8ec3b331f0b9bbaa489689ffd080100ef6e9;p=mesa.git freedreno/ir3: Add workaround for VS samgq This instruction needs a workaround when used from vertex shaders. Fixes: dEQP-GLES3.functional.shaders.texture_functions.texturegradoffset.sampler2dshadow_vertex dEQP-GLES3.functional.shaders.texture_functions.texturegradoffset.sampler3d_fixed_vertex dEQP-GLES3.functional.shaders.texture_functions.texturegradoffset.sampler3d_float_vertex dEQP-GLES3.functional.shaders.texture_functions.textureprojgradoffset.sampler2dshadow_vertex dEQP-GLES3.functional.shaders.texture_functions.textureprojgradoffset.sampler3d_fixed_vertex dEQP-GLES3.functional.shaders.texture_functions.textureprojgradoffset.sampler3d_float_vertex dEQP-GLES3.functional.shaders.texture_functions.textureprojgrad.sampler2dshadow_vertex Signed-off-by: Kristian H. Kristensen Reviewed-by: Rob Clark --- diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 1bded7dd122..8c7410a8213 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -46,11 +46,12 @@ void * ir3_alloc(struct ir3 *shader, int sz) } struct ir3 * ir3_create(struct ir3_compiler *compiler, - unsigned nin, unsigned nout) + gl_shader_stage type, unsigned nin, unsigned nout) { struct ir3 *shader = rzalloc(compiler, struct ir3); shader->compiler = compiler; + shader->type = type; shader->ninputs = nin; shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin); diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 6e30f74d4ab..8f58d67fb94 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -408,6 +408,7 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr) struct ir3 { struct ir3_compiler *compiler; + gl_shader_stage type; unsigned ninputs, noutputs; struct ir3_instruction **inputs; @@ -523,7 +524,7 @@ block_id(struct ir3_block *block) } struct ir3 * ir3_create(struct ir3_compiler *compiler, - unsigned nin, unsigned nout); + gl_shader_stage type, unsigned nin, unsigned nout); void ir3_destroy(struct ir3 *shader); void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, uint32_t gpu_id); diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index ac126d5b98f..b0f2b139d5e 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -52,6 +52,10 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id compiler->gpu_id = gpu_id; compiler->set = ir3_ra_alloc_reg_set(compiler); + if (compiler->gpu_id >= 600) { + compiler->samgq_workaround = true; + } + if (compiler->gpu_id >= 400) { /* need special handling for "flat" */ compiler->flat_bypass = true; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 1bc59970c4e..181125fa3b1 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -63,6 +63,10 @@ struct ir3_compiler { * index coordinate: */ bool array_index_add_half; + + /* on a6xx, rewrite samgp to sequence of samgq0-3 in vertex shaders: + */ + bool samgq_workaround; }; struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 3b343a3055f..ad287afd308 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2438,7 +2438,7 @@ emit_instructions(struct ir3_context *ctx) */ ninputs += max_sysvals[ctx->so->type]; - ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs); + ctx->ir = ir3_create(ctx->compiler, ctx->so->type, ninputs, noutputs); /* Create inputs in first block: */ ctx->block = get_block(ctx, nir_start_block(fxn)); diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index cb9a3f97292..e28cac216b5 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -41,6 +41,7 @@ struct ir3_legalize_ctx { struct ir3_compiler *compiler; + gl_shader_stage type; bool has_ssbo; bool need_pixlod; int max_bary; @@ -212,7 +213,20 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) } } - list_addtail(&n->node, &block->instr_list); + if (ctx->compiler->samgq_workaround && + ctx->type == MESA_SHADER_VERTEX && n->opc == OPC_SAMGQ) { + struct ir3_instruction *samgp; + + for (i = 0; i < 4; i++) { + samgp = ir3_instr_clone(n); + samgp->opc = OPC_SAMGP0 + i; + if (i > 1) + samgp->flags |= IR3_INSTR_SY; + } + list_delinit(&n->node); + } else { + list_addtail(&n->node, &block->instr_list); + } if (is_sfu(n)) regmask_set(&state->needs_ss, n->regs[0]); @@ -480,6 +494,7 @@ ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary) ctx->max_bary = -1; ctx->compiler = ir->compiler; + ctx->type = ir->type; /* allocate per-block data: */ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {