From: Jonathan Marek Date: Wed, 19 Dec 2018 01:23:16 +0000 (-0500) Subject: freedreno: a2xx: add ir2 copy propagation X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9f614c74b7f56bbc6b5963d7fafa289a642f8785;p=mesa.git freedreno: a2xx: add ir2 copy propagation Two cases: * replacing srcs which refer to MOV instructions * replacing MOVs used to write to exports Signed-off-by: Jonathan Marek --- diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 119b3147c5c..e576739b8ab 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -68,6 +68,7 @@ a2xx_SOURCES := \ a2xx/ir2.c \ a2xx/ir2.h \ a2xx/ir2_assemble.c \ + a2xx/ir2_cp.c \ a2xx/ir2_nir.c \ a2xx/ir2_private.h \ a2xx/ir2_ra.c diff --git a/src/gallium/drivers/freedreno/a2xx/ir2.c b/src/gallium/drivers/freedreno/a2xx/ir2.c index 4d00dd5db26..8995d7e8aaf 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2.c +++ b/src/gallium/drivers/freedreno/a2xx/ir2.c @@ -425,9 +425,15 @@ ir2_compile(struct fd2_shader_stateobj *so, unsigned variant, /* convert nir to internal representation */ ir2_nir_compile(&ctx, binning); + /* copy propagate srcs */ + cp_src(&ctx); + /* get ref_counts and kill non-needed instructions */ ra_count_refs(&ctx); + /* remove movs used to write outputs */ + cp_export(&ctx); + /* instruction order.. and vector->scalar conversions */ schedule_instrs(&ctx); diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_cp.c b/src/gallium/drivers/freedreno/a2xx/ir2_cp.c new file mode 100644 index 00000000000..fa155887f80 --- /dev/null +++ b/src/gallium/drivers/freedreno/a2xx/ir2_cp.c @@ -0,0 +1,225 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include "ir2_private.h" + +static bool is_mov(struct ir2_instr *instr) +{ + return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv && + instr->src_count == 1; +} + +static void src_combine(struct ir2_src *src, struct ir2_src b) +{ + src->num = b.num; + src->type = b.type; + src->swizzle = swiz_merge(b.swizzle, src->swizzle); + if (!src->abs) /* if we have abs we don't care about previous negate */ + src->negate ^= b.negate; + src->abs |= b.abs; +} + +/* cp_src: replace src regs when they refer to a mov instruction + * example: + * ALU: MAXv R7 = C7, C7 + * ALU: MULADDv R7 = R7, R10, R0.xxxx + * becomes: + * ALU: MULADDv R7 = C7, R10, R0.xxxx + */ +void cp_src(struct ir2_context *ctx) +{ + struct ir2_instr *p; + + ir2_foreach_instr(instr, ctx) { + ir2_foreach_src(src, instr) { + /* loop to replace recursively */ + do { + if (src->type != IR2_SRC_SSA) + break; + + p = &ctx->instr[src->num]; + /* don't work across blocks to avoid possible issues */ + if (p->block_idx != instr->block_idx) + break; + + if (!is_mov(p)) + break; + + /* cant apply abs to const src, const src only for alu */ + if (p->src[0].type == IR2_SRC_CONST && + (src->abs || instr->type != IR2_ALU)) + break; + + src_combine(src, p->src[0]); + } while (1); + } + } +} + +/* cp_export: replace mov to export when possible + * in the cp_src pass we bypass any mov instructions related + * to the src registers, but for exports for need something different + * example: + * ALU: MAXv R3.x___ = C9.x???, C9.x??? + * ALU: MAXv R3._y__ = R0.?x??, C8.?x?? + * ALU: MAXv export0 = R3.yyyx, R3.yyyx + * becomes: + * ALU: MAXv export0.___w = C9.???x, C9.???x + * ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx? + * + */ +void cp_export(struct ir2_context *ctx) +{ + struct ir2_instr *c[4], *ins[4]; + struct ir2_src *src; + struct ir2_reg *reg; + unsigned ncomp; + + ir2_foreach_instr(instr, ctx) { + if (!is_export(instr)) /* TODO */ + continue; + + if (!is_mov(instr)) + continue; + + src = &instr->src[0]; + + if (src->negate || src->abs) /* TODO handle these cases */ + continue; + + if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST) + continue; + + reg = get_reg_src(ctx, src); + ncomp = dst_ncomp(instr); + + unsigned reswiz[4] = {}; + unsigned num_instr = 0; + + /* fill array c with pointers to instrs that write each component */ + if (src->type == IR2_SRC_SSA) { + struct ir2_instr *instr = &ctx->instr[src->num]; + + if (instr->type != IR2_ALU) + continue; + + for (int i = 0; i < ncomp; i++) + c[i] = instr; + + ins[num_instr++] = instr; + reswiz[0] = src->swizzle; + } else { + bool ok = true; + unsigned write_mask = 0; + + ir2_foreach_instr(instr, ctx) { + if (instr->is_ssa || instr->reg != reg) + continue; + + /* set by non-ALU */ + if (instr->type != IR2_ALU) { + ok = false; + break; + } + + /* component written more than once */ + if (write_mask & instr->alu.write_mask) { + ok = false; + break; + } + + write_mask |= instr->alu.write_mask; + + /* src pointers for components */ + for (int i = 0, j = 0; i < 4; i++) { + unsigned k = swiz_get(src->swizzle, i); + if (instr->alu.write_mask & 1 << k) { + c[i] = instr; + + /* reswiz = compressed src->swizzle */ + unsigned x = 0; + for (int i = 0; i < k; i++) + x += !!(instr->alu.write_mask & 1 << i); + + assert(src->swizzle || x == j); + reswiz[num_instr] |= swiz_set(x, j++); + } + } + ins[num_instr++] = instr; + } + if (!ok) + continue; + } + + bool redirect = true; + + /* must all be in same block */ + for (int i = 0; i < ncomp; i++) + redirect &= (c[i]->block_idx == instr->block_idx); + + /* no other instr using the value */ + ir2_foreach_instr(p, ctx) { + if (p == instr) + continue; + ir2_foreach_src(src, p) + redirect &= reg != get_reg_src(ctx, src); + } + + if (!redirect) + continue; + + /* redirect the instructions writing to the register */ + for (int i = 0; i < num_instr; i++) { + struct ir2_instr *p = ins[i]; + + p->alu.export = instr->alu.export; + p->alu.write_mask = 0; + p->is_ssa = true; + p->ssa.ncomp = 0; + memset(p->ssa.comp, 0, sizeof(p->ssa.comp)); + + switch (instr->alu.vector_opc) { + case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv: + case DOT2ADDv: + case DOT3v: + case DOT4v: + case CUBEv: + continue; + default: + break; + } + ir2_foreach_src(s, p) + swiz_merge_p(&s->swizzle, reswiz[i]); + } + + for (int i = 0; i < ncomp; i++) { + c[i]->alu.write_mask |= (1 << i); + c[i]->ssa.ncomp++; + } + instr->type = IR2_NONE; + instr->need_emit = false; + } +} diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_private.h b/src/gallium/drivers/freedreno/a2xx/ir2_private.h index d1fbacd908f..3bb3fe483de 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2_private.h +++ b/src/gallium/drivers/freedreno/a2xx/ir2_private.h @@ -195,6 +195,9 @@ void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr); void ra_block_free(struct ir2_context *ctx, unsigned block); +void cp_src(struct ir2_context *ctx); +void cp_export(struct ir2_context *ctx); + /* utils */ enum { IR2_SWIZZLE_Y = 1 << 0, diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index 40b55ad491d..df1e617f60b 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -86,6 +86,7 @@ files_libfreedreno = files( 'a2xx/ir2.c', 'a2xx/ir2.h', 'a2xx/ir2_assemble.c', + 'a2xx/ir2_cp.c', 'a2xx/ir2_nir.c', 'a2xx/ir2_private.h', 'a2xx/ir2_ra.c',