X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_opt_algebraic.c;h=5e7d26923de822921cf39ff567cb0a79f3cc4b36;hb=62e36ba43860fdbfb1a1058c64d0ec71734f7b0a;hp=2bf474ccef9cfe23cda8d2c0390cd48224f0fab2;hpb=b0a1e401a93b7b13870b936bc667b3fc15dba6d5;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index 2bf474ccef9..5e7d26923de 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -33,40 +33,253 @@ */ #include "vc4_qir.h" +#include "util/u_math.h" + +static bool debug; + +static void +dump_from(struct vc4_compile *c, struct qinst *inst) +{ + if (!debug) + return; + + fprintf(stderr, "optimizing: "); + qir_dump_inst(c, inst); + fprintf(stderr, "\n"); +} + +static void +dump_to(struct vc4_compile *c, struct qinst *inst) +{ + if (!debug) + return; + + fprintf(stderr, "to: "); + qir_dump_inst(c, inst); + fprintf(stderr, "\n"); +} + +static bool +is_constant_value(struct vc4_compile *c, struct qreg reg, + uint32_t val) +{ + if (reg.file == QFILE_UNIF && + !reg.pack && + c->uniform_contents[reg.index] == QUNIFORM_CONSTANT && + c->uniform_data[reg.index] == val) { + return true; + } + + if (reg.file == QFILE_SMALL_IMM && reg.index == val) + return true; + + return false; +} + +static bool +is_zero(struct vc4_compile *c, struct qreg reg) +{ + reg = qir_follow_movs(c, reg); + return is_constant_value(c, reg, 0); +} + +static bool +is_1f(struct vc4_compile *c, struct qreg reg) +{ + reg = qir_follow_movs(c, reg); + return is_constant_value(c, reg, fui(1.0)); +} + +static void +replace_with_mov(struct vc4_compile *c, struct qinst *inst, struct qreg arg) +{ + dump_from(c, inst); + + inst->src[0] = arg; + if (qir_has_implicit_tex_uniform(inst)) + inst->src[1] = inst->src[qir_get_tex_uniform_src(inst)]; + + if (qir_is_mul(inst)) + inst->op = QOP_MMOV; + else if (qir_is_float_input(inst)) + inst->op = QOP_FMOV; + else + inst->op = QOP_MOV; + dump_to(c, inst); +} + +static bool +replace_x_0_with_x(struct vc4_compile *c, + struct qinst *inst, + int arg) +{ + if (!is_zero(c, inst->src[arg])) + return false; + replace_with_mov(c, inst, inst->src[1 - arg]); + return true; +} + +static bool +replace_x_0_with_0(struct vc4_compile *c, + struct qinst *inst, + int arg) +{ + if (!is_zero(c, inst->src[arg])) + return false; + replace_with_mov(c, inst, inst->src[arg]); + return true; +} + +static bool +fmul_replace_one(struct vc4_compile *c, + struct qinst *inst, + int arg) +{ + if (!is_1f(c, inst->src[arg])) + return false; + replace_with_mov(c, inst, inst->src[1 - arg]); + return true; +} bool -qir_opt_algebraic(struct qcompile *c) +qir_opt_algebraic(struct vc4_compile *c) { bool progress = false; - struct simple_node *node; - bool debug = false; - - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; + qir_for_each_inst_inorder(inst, c) { switch (inst->op) { - case QOP_CMP: - /* Turn "dst = (a < 0) ? b : b)" into "dst = b" */ - if (qir_reg_equals(inst->src[1], inst->src[2])) { - if (debug) { - fprintf(stderr, "optimizing: "); - qir_dump_inst(inst); - fprintf(stderr, "\n"); - } + case QOP_FMIN: + if (is_1f(c, inst->src[1]) && + inst->src[0].pack >= QPU_UNPACK_8D_REP && + inst->src[0].pack <= QPU_UNPACK_8D) { + replace_with_mov(c, inst, inst->src[0]); + progress = true; + } + break; - inst->op = QOP_MOV; - inst->src[0] = inst->src[1]; - inst->src[1] = c->undef; + case QOP_FMAX: + if (is_zero(c, inst->src[1]) && + inst->src[0].pack >= QPU_UNPACK_8D_REP && + inst->src[0].pack <= QPU_UNPACK_8D) { + replace_with_mov(c, inst, inst->src[0]); progress = true; + } + break; + + case QOP_FSUB: + case QOP_SUB: + if (is_zero(c, inst->src[1])) { + replace_with_mov(c, inst, inst->src[0]); + progress = true; + } + break; + + case QOP_ADD: + /* Kernel validation requires that we use an actual + * add instruction. + */ + if (inst->dst.file != QFILE_TEX_S_DIRECT && + (replace_x_0_with_x(c, inst, 0) || + replace_x_0_with_x(c, inst, 1))) { + progress = true; + break; + } + break; + + case QOP_FADD: + if (replace_x_0_with_x(c, inst, 0) || + replace_x_0_with_x(c, inst, 1)) { + progress = true; + break; + } + + /* FADD(a, FSUB(0, b)) -> FSUB(a, b) */ + if (inst->src[1].file == QFILE_TEMP && + c->defs[inst->src[1].index] && + c->defs[inst->src[1].index]->op == QOP_FSUB) { + struct qinst *fsub = c->defs[inst->src[1].index]; + if (is_zero(c, fsub->src[0])) { + dump_from(c, inst); + inst->op = QOP_FSUB; + inst->src[1] = fsub->src[1]; + progress = true; + dump_to(c, inst); + break; + } + } - if (debug) { - fprintf(stderr, "to: "); - qir_dump_inst(inst); - fprintf(stderr, "\n"); + /* FADD(FSUB(0, b), a) -> FSUB(a, b) */ + if (inst->src[0].file == QFILE_TEMP && + c->defs[inst->src[0].index] && + c->defs[inst->src[0].index]->op == QOP_FSUB) { + struct qinst *fsub = c->defs[inst->src[0].index]; + if (is_zero(c, fsub->src[0])) { + dump_from(c, inst); + inst->op = QOP_FSUB; + inst->src[0] = inst->src[1]; + inst->src[1] = fsub->src[1]; + dump_to(c, inst); + progress = true; + break; } } break; + case QOP_FMUL: + if (!inst->dst.pack && + (replace_x_0_with_0(c, inst, 0) || + replace_x_0_with_0(c, inst, 1) || + fmul_replace_one(c, inst, 0) || + fmul_replace_one(c, inst, 1))) { + progress = true; + break; + } + break; + + case QOP_MUL24: + if (!inst->dst.pack && + (replace_x_0_with_0(c, inst, 0) || + replace_x_0_with_0(c, inst, 1))) { + progress = true; + break; + } + break; + + case QOP_AND: + if (replace_x_0_with_0(c, inst, 0) || + replace_x_0_with_0(c, inst, 1)) { + progress = true; + break; + } + + if (is_constant_value(c, inst->src[0], ~0)) { + replace_with_mov(c, inst, inst->src[1]); + progress = true; + break; + } + if (is_constant_value(c, inst->src[1], ~0)) { + replace_with_mov(c, inst, inst->src[0]); + progress = true; + break; + } + break; + + case QOP_OR: + if (replace_x_0_with_x(c, inst, 0) || + replace_x_0_with_x(c, inst, 1)) { + progress = true; + break; + } + break; + + case QOP_RCP: + if (is_1f(c, inst->src[0])) { + replace_with_mov(c, inst, inst->src[0]); + progress = true; + break; + } + break; + default: break; }