From 956d3194464a83eaf1bb8fb64aeac897edf2943b Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Thu, 10 Oct 2019 14:43:03 -0700 Subject: [PATCH] freedreno/ir3: Extend RA with mechanism for pre-coloring registers We'll need to pre-color certain input registers betwee VS and GS shaders. Signed-off-by: Kristian H. Kristensen --- src/freedreno/ir3/ir3.h | 2 +- src/freedreno/ir3/ir3_compiler_nir.c | 30 ++++++++++- src/freedreno/ir3/ir3_ra.c | 78 +++++++++++----------------- 3 files changed, 60 insertions(+), 50 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 224ed4e5c66..cf53fa087d3 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1081,7 +1081,7 @@ void ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so); /* register assignment: */ struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler); -int ir3_ra(struct ir3_shader_variant *v); +int ir3_ra(struct ir3_shader_variant *v, struct ir3_instruction **precolor, unsigned nprecolor); /* legalize: */ void ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index aa9eaca7bdd..118e7781c6a 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2996,7 +2996,35 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ir3_print(ir); } - ret = ir3_ra(so); + /* Pre-assign VS inputs on a6xx+ binning pass shader, to align + * with draw pass VS, so binning and draw pass can both use the + * same VBO state. + * + * Note that VS inputs are expected to be full precision. + */ + bool pre_assign_inputs = (ir->compiler->gpu_id >= 600) && + (ir->type == MESA_SHADER_VERTEX) && + so->binning_pass; + + if (pre_assign_inputs) { + for (unsigned i = 0; i < ir->ninputs; i++) { + struct ir3_instruction *instr = ir->inputs[i]; + + if (!instr) + continue; + + unsigned n = i / 4; + unsigned c = i % 4; + unsigned regid = so->nonbinning->inputs[n].regid + c; + + instr->regs[0]->num = regid; + } + + ret = ir3_ra(so, ir->inputs, ir->ninputs); + } else { + ret = ir3_ra(so, NULL, 0); + } + if (ret) { DBG("RA failed!"); goto out; diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index a641661a441..39d34bb51d9 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -1090,38 +1090,20 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) } static int -ra_alloc(struct ir3_ra_ctx *ctx) +ra_alloc(struct ir3_ra_ctx *ctx, struct ir3_instruction **precolor, unsigned nprecolor) { - /* Pre-assign VS inputs on a6xx+ binning pass shader, to align - * with draw pass VS, so binning and draw pass can both use the - * same VBO state. - * - * Note that VS inputs are expected to be full precision. - */ - bool pre_assign_inputs = (ctx->ir->compiler->gpu_id >= 600) && - (ctx->ir->type == MESA_SHADER_VERTEX) && - ctx->v->binning_pass; - - if (pre_assign_inputs) { - for (unsigned i = 0; i < ctx->ir->ninputs; i++) { - struct ir3_instruction *instr = ctx->ir->inputs[i]; - - if (!instr) - continue; + unsigned num_precolor = 0; + for (unsigned i = 0; i < nprecolor; i++) { + if (precolor[i] && !(precolor[i]->flags & IR3_INSTR_UNUSED)) { + struct ir3_instruction *instr = precolor[i]; + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; debug_assert(!(instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH))); - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - /* only consider the first component: */ if (id->off > 0) continue; - unsigned name = ra_name(ctx, id); - - unsigned n = i / 4; - unsigned c = i % 4; - /* 'base' is in scalar (class 0) but we need to map that * the conflicting register of the appropriate class (ie. * input could be vec2/vec3/etc) @@ -1139,10 +1121,11 @@ ra_alloc(struct ir3_ra_ctx *ctx) * R3 | D2 * .. and so on.. */ - unsigned reg = ctx->set->gpr_to_ra_reg[id->cls] - [ctx->v->nonbinning->inputs[n].regid + c]; - + unsigned regid = instr->regs[0]->num; + unsigned reg = ctx->set->gpr_to_ra_reg[id->cls][regid]; + unsigned name = ra_name(ctx, id); ra_set_node_reg(ctx->g, name, reg); + num_precolor = MAX2(regid, num_precolor); } } @@ -1174,31 +1157,30 @@ retry: } /* also need to not conflict with any pre-assigned inputs: */ - if (pre_assign_inputs) { - for (unsigned i = 0; i < ctx->ir->ninputs; i++) { - struct ir3_instruction *instr = ctx->ir->inputs[i]; + for (unsigned i = 0; i < nprecolor; i++) { + struct ir3_instruction *instr = precolor[i]; - if (!instr) - continue; + if (!instr) + continue; - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - /* only consider the first component: */ - if (id->off > 0) - continue; + /* only consider the first component: */ + if (id->off > 0) + continue; - unsigned name = ra_name(ctx, id); + unsigned name = ra_name(ctx, id); + unsigned regid = instr->regs[0]->num; - /* Check if array intersects with liverange AND register - * range of the input: - */ - if (intersects(arr->start_ip, arr->end_ip, - ctx->def[name], ctx->use[name]) && + /* Check if array intersects with liverange AND register + * range of the input: + */ + if (intersects(arr->start_ip, arr->end_ip, + ctx->def[name], ctx->use[name]) && intersects(base, base + arr->length, - i, i + class_sizes[id->cls])) { - base = MAX2(base, i + class_sizes[id->cls]); - goto retry; - } + regid, regid + class_sizes[id->cls])) { + base = MAX2(base, regid + class_sizes[id->cls]); + goto retry; } } @@ -1224,7 +1206,7 @@ retry: return 0; } -int ir3_ra(struct ir3_shader_variant *v) +int ir3_ra(struct ir3_shader_variant *v, struct ir3_instruction **precolor, unsigned nprecolor) { struct ir3_ra_ctx ctx = { .v = v, @@ -1235,7 +1217,7 @@ int ir3_ra(struct ir3_shader_variant *v) ra_init(&ctx); ra_add_interference(&ctx); - ret = ra_alloc(&ctx); + ret = ra_alloc(&ctx, precolor, nprecolor); ra_destroy(&ctx); return ret; -- 2.30.2