X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fir3_legalize.c;h=69efdbabc3f1828854d0b7260c447a4c8b944675;hb=2e1df6a17ff82c4a456caa8be4bfae1fac009b6a;hp=3dc6a6c7371f009c40d67a9aabf42e0d0d07ebdb;hpb=92d6eb4dd539f48355ebfe5408f6a18c5a4c3efd;p=mesa.git diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 3dc6a6c7371..69efdbabc3f 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -28,15 +28,21 @@ #include "util/u_math.h" #include "ir3.h" -#include "ir3_compiler.h" +#include "ir3_shader.h" /* * Legalize: * - * We currently require that scheduling ensures that we have enough nop's - * in all the right places. The legalize step mostly handles fixing up - * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's - * into fewer nop's w/ rpt flag. + * The legalize pass handles ensuring sufficient nop's and sync flags for + * correct execution. + * + * 1) Iteratively determine where sync ((sy)/(ss)) flags are needed, + * based on state flowing out of predecessor blocks until there is + * no further change. In some cases this requires inserting nops. + * 2) Mark (ei) on last varying input, and (ul) on last use of a0.x + * 3) Final nop scheduling for instruction latency + * 4) Resolve jumps and schedule blocks, marking potential convergence + * points with (jp) */ struct ir3_legalize_ctx { @@ -88,6 +94,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) struct ir3_legalize_state *state = &bd->state; bool last_input_needs_ss = false; bool has_tex_prefetch = false; + bool mergedregs = ctx->so->mergedregs; /* our input state is the OR of all predecessor blocks' state: */ set_foreach(block->predecessors, entry) { @@ -132,15 +139,15 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (last_n && is_barrier(last_n)) { n->flags |= IR3_INSTR_SS | IR3_INSTR_SY; last_input_needs_ss = false; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); - regmask_init(&state->needs_sy); + regmask_init(&state->needs_ss_war, mergedregs); + regmask_init(&state->needs_ss, mergedregs); + regmask_init(&state->needs_sy, mergedregs); } if (last_n && (last_n->opc == OPC_PREDT)) { n->flags |= IR3_INSTR_SS; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); + regmask_init(&state->needs_ss_war, mergedregs); + regmask_init(&state->needs_ss, mergedregs); } /* NOTE: consider dst register too.. it could happen that @@ -161,13 +168,13 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (regmask_get(&state->needs_ss, reg)) { n->flags |= IR3_INSTR_SS; last_input_needs_ss = false; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); + regmask_init(&state->needs_ss_war, mergedregs); + regmask_init(&state->needs_ss, mergedregs); } if (regmask_get(&state->needs_sy, reg)) { n->flags |= IR3_INSTR_SY; - regmask_init(&state->needs_sy); + regmask_init(&state->needs_sy, mergedregs); } } @@ -184,8 +191,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (regmask_get(&state->needs_ss_war, reg)) { n->flags |= IR3_INSTR_SS; last_input_needs_ss = false; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); + regmask_init(&state->needs_ss_war, mergedregs); + regmask_init(&state->needs_ss, mergedregs); } if (last_rel && (reg->num == regid(REG_A0, 0))) { @@ -226,13 +233,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) list_addtail(&n->node, &block->instr_list); } - if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) { - struct ir3_instruction *op_p = ir3_instr_clone(n); - op_p->flags = IR3_INSTR_P; - - ctx->so->need_fine_derivatives = true; - } - if (is_sfu(n)) regmask_set(&state->needs_ss, n->regs[0]); @@ -353,6 +353,42 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) return true; } +/* Expands dsxpp and dsypp macros to: + * + * dsxpp.1 dst, src + * dsxpp.1.p dst, src + * + * We apply this after flags syncing, as we don't want to sync in between the + * two (which might happen if dst == src). We do it before nop scheduling + * because that needs to count actual instructions. + */ +static bool +apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block) +{ + struct list_head instr_list; + + /* remove all the instructions from the list, we'll be adding + * them back in as we go + */ + list_replace(&block->instr_list, &instr_list); + list_inithead(&block->instr_list); + + foreach_instr_safe (n, &instr_list) { + list_addtail(&n->node, &block->instr_list); + + if (n->opc == OPC_DSXPP_MACRO || n->opc == OPC_DSYPP_MACRO) { + n->opc = (n->opc == OPC_DSXPP_MACRO) ? OPC_DSXPP_1 : OPC_DSYPP_1; + + struct ir3_instruction *op_p = ir3_instr_clone(n); + op_p->flags = IR3_INSTR_P; + + ctx->so->need_fine_derivatives = true; + } + } + + return true; +} + /* NOTE: branch instructions are always the last instruction(s) * in the block. We take advantage of this as we resolve the * branches, since "if (foo) break;" constructs turn into @@ -710,6 +746,7 @@ bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) { struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx); + bool mergedregs = so->mergedregs; bool progress; ctx->so = so; @@ -719,7 +756,14 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) /* allocate per-block data: */ foreach_block (block, &ir->block_list) { - block->data = rzalloc(ctx, struct ir3_legalize_block_data); + struct ir3_legalize_block_data *bd = + rzalloc(ctx, struct ir3_legalize_block_data); + + regmask_init(&bd->state.needs_ss_war, mergedregs); + regmask_init(&bd->state.needs_ss, mergedregs); + regmask_init(&bd->state.needs_sy, mergedregs); + + block->data = bd; } ir3_remove_nops(ir); @@ -737,6 +781,11 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) block_sched(ir); if (so->type == MESA_SHADER_FRAGMENT) kill_sched(ir, so); + + foreach_block (block, &ir->block_list) { + progress |= apply_fine_deriv_macro(ctx, block); + } + nop_sched(ir); do {