X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_combine_constants.cpp;h=5bd5343b8b4436adf8a20569d92c0466387c4d4b;hb=55364ab5b7136e09a61d858f1167dee81e17bd9f;hp=ebde8df2ff1696782c84a057ccd7d938b662d373;hpb=b0d422cd2a99d2fd26ab11880d5d8410ebfc64b2;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp index ebde8df2ff1..5bd5343b8b4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -35,16 +35,19 @@ */ #include "brw_fs.h" -#include "brw_fs_live_variables.h" #include "brw_cfg.h" +using namespace brw; + +static const bool debug = false; + /* Returns whether an instruction could co-issue if its immediate source were * replaced with a GRF source. */ static bool -could_coissue(const struct brw_context *brw, const fs_inst *inst) +could_coissue(const struct brw_device_info *devinfo, const fs_inst *inst) { - if (brw->gen != 7) + if (devinfo->gen != 7) return false; switch (inst->opcode) { @@ -62,9 +65,11 @@ could_coissue(const struct brw_context *brw, const fs_inst *inst) * Returns true for instructions that don't support immediate sources. */ static bool -must_promote_imm(const fs_inst *inst) +must_promote_imm(const struct brw_device_info *devinfo, const fs_inst *inst) { switch (inst->opcode) { + case SHADER_OPCODE_POW: + return devinfo->gen < 8; case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: return true; @@ -117,7 +122,7 @@ struct imm { * constant value. */ uint8_t subreg_offset; - uint16_t reg; + uint16_t nr; /** The number of coissuable instructions using this immediate. */ uint16_t uses_by_coissue; @@ -142,8 +147,6 @@ struct table { static struct imm * find_imm(struct table *table, float val) { - assert(signbit(val) == 0); - for (int i = 0; i < table->len; i++) { if (table->imm[i].val == val) { return &table->imm[i]; @@ -207,7 +210,7 @@ fs_visitor::opt_combine_constants() foreach_block_and_inst(block, fs_inst, inst, cfg) { ip++; - if (!could_coissue(brw, inst) && !must_promote_imm(inst)) + if (!could_coissue(devinfo, inst) && !must_promote_imm(devinfo, inst)) continue; for (int i = 0; i < inst->sources; i++) { @@ -215,7 +218,8 @@ fs_visitor::opt_combine_constants() inst->src[i].type != BRW_REGISTER_TYPE_F) continue; - float val = fabsf(inst->src[i].fixed_hw_reg.dw1.f); + float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f : + fabs(inst->src[i].f); struct imm *imm = find_imm(&table, val); if (imm) { @@ -224,8 +228,8 @@ fs_visitor::opt_combine_constants() imm->inst = NULL; imm->block = intersection; imm->uses->push_tail(link(const_ctx, &inst->src[i])); - imm->uses_by_coissue += could_coissue(brw, inst); - imm->must_promote = imm->must_promote || must_promote_imm(inst); + imm->uses_by_coissue += could_coissue(devinfo, inst); + imm->must_promote = imm->must_promote || must_promote_imm(devinfo, inst); imm->last_use_ip = ip; } else { imm = new_imm(&table, const_ctx); @@ -234,8 +238,8 @@ fs_visitor::opt_combine_constants() imm->uses = new(const_ctx) exec_list(); imm->uses->push_tail(link(const_ctx, &inst->src[i])); imm->val = val; - imm->uses_by_coissue = could_coissue(brw, inst); - imm->must_promote = must_promote_imm(inst); + imm->uses_by_coissue = could_coissue(devinfo, inst); + imm->must_promote = must_promote_imm(devinfo, inst); imm->first_use_ip = ip; imm->last_use_ip = ip; } @@ -262,27 +266,25 @@ fs_visitor::opt_combine_constants() if (cfg->num_blocks != 1) qsort(table.imm, table.len, sizeof(struct imm), compare); - /* Insert MOVs to load the constant values into GRFs. */ - fs_reg reg(GRF, alloc.allocate(dispatch_width / 8)); + fs_reg reg(VGRF, alloc.allocate(1)); reg.stride = 0; for (int i = 0; i < table.len; i++) { struct imm *imm = &table.imm[i]; - - fs_inst *mov = MOV(reg, fs_reg(imm->val)); - mov->force_writemask_all = true; - if (imm->inst) { - imm->inst->insert_before(imm->block, mov); - } else { - backend_instruction *inst = imm->block->last_non_control_flow_inst(); - inst->insert_after(imm->block, mov); - } - imm->reg = reg.reg; + /* Insert it either before the instruction that generated the immediate + * or after the last non-control flow instruction of the common ancestor. + */ + exec_node *n = (imm->inst ? imm->inst : + imm->block->last_non_control_flow_inst()->next); + const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0); + + ibld.MOV(reg, brw_imm_f(imm->val)); + imm->nr = reg.nr; imm->subreg_offset = reg.subreg_offset; reg.subreg_offset += sizeof(float); - if ((unsigned)reg.subreg_offset == dispatch_width * sizeof(float)) { - reg.reg = alloc.allocate(dispatch_width / 8); + if ((unsigned)reg.subreg_offset == 8 * sizeof(float)) { + reg.nr = alloc.allocate(1); reg.subreg_offset = 0; } } @@ -292,13 +294,31 @@ fs_visitor::opt_combine_constants() for (int i = 0; i < table.len; i++) { foreach_list_typed(reg_link, link, link, table.imm[i].uses) { fs_reg *reg = link->reg; - reg->file = GRF; - reg->reg = table.imm[i].reg; + reg->file = VGRF; + reg->nr = table.imm[i].nr; reg->subreg_offset = table.imm[i].subreg_offset; reg->stride = 0; - reg->negate = signbit(reg->fixed_hw_reg.dw1.f) != - signbit(table.imm[i].val); - assert(fabsf(reg->fixed_hw_reg.dw1.f) == table.imm[i].val); + reg->negate = signbit(reg->f) != signbit(table.imm[i].val); + assert((isnan(reg->f) && isnan(table.imm[i].val)) || + fabsf(reg->f) == fabs(table.imm[i].val)); + } + } + + if (debug) { + for (int i = 0; i < table.len; i++) { + struct imm *imm = &table.imm[i]; + + printf("%.3fF - block %3d, reg %3d sub %2d, Uses: (%2d, %2d), " + "IP: %4d to %4d, length %4d\n", + imm->val, + imm->block->num, + imm->nr, + imm->subreg_offset, + imm->must_promote, + imm->uses_by_coissue, + imm->first_use_ip, + imm->last_use_ip, + imm->last_use_ip - imm->first_use_ip); } }