i965/fs: Lower 32x32 bit multiplication on BXT.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_cse.cpp
index 3ddd17c7e3b383aadb262aa2fce41d54def12767..c7628dcc2f456628d3b941996d1f04f06979b403 100644 (file)
@@ -32,6 +32,8 @@
  * 13.1 (p378).
  */
 
+using namespace brw;
+
 namespace {
 struct aeb_entry : public exec_node {
    /** The instruction that generates the expression value. */
@@ -59,6 +61,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
    case BRW_OPCODE_CMPN:
    case BRW_OPCODE_ADD:
    case BRW_OPCODE_MUL:
+   case SHADER_OPCODE_MULH:
    case BRW_OPCODE_FRC:
    case BRW_OPCODE_RNDU:
    case BRW_OPCODE_RNDD:
@@ -173,11 +176,11 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
           operands_match(a, b, negate);
 }
 
-static fs_inst *
-create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate)
+static void
+create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
 {
    int written = inst->regs_written;
-   int dst_width = inst->dst.width / 8;
+   int dst_width = inst->exec_size / 8;
    fs_inst *copy;
 
    if (written > dst_width) {
@@ -193,25 +196,21 @@ create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate)
       }
 
       assert(src.file == GRF);
-      payload = ralloc_array(v->mem_ctx, fs_reg, sources);
+      payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources);
       for (int i = 0; i < header_size; i++) {
          payload[i] = src;
-         payload[i].width = 8;
          src.reg_offset++;
       }
       for (int i = header_size; i < sources; i++) {
          payload[i] = src;
-         src = offset(src, 1);
+         src = offset(src, bld, 1);
       }
-      copy = v->LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
+      copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
    } else {
-      copy = v->MOV(inst->dst, src);
-      copy->force_writemask_all = inst->force_writemask_all;
+      copy = bld.MOV(inst->dst, src);
       copy->src[0].negate = negate;
    }
    assert(copy->regs_written == written);
-
-   return copy;
 }
 
 bool
@@ -258,16 +257,14 @@ fs_visitor::opt_cse_local(bblock_t *block)
              */
             bool no_existing_temp = entry->tmp.file == BAD_FILE;
             if (no_existing_temp && !entry->generator->dst.is_null()) {
+               const fs_builder ibld = fs_builder(this, block, entry->generator)
+                                       .at(block, entry->generator->next);
                int written = entry->generator->regs_written;
-               assert((written * 8) % entry->generator->dst.width == 0);
 
                entry->tmp = fs_reg(GRF, alloc.allocate(written),
-                                   entry->generator->dst.type,
-                                   entry->generator->dst.width);
+                                   entry->generator->dst.type);
 
-               fs_inst *copy = create_copy_instr(this, entry->generator,
-                                                 entry->tmp, false);
-               entry->generator->insert_after(block, copy);
+               create_copy_instr(ibld, entry->generator, entry->tmp, false);
 
                entry->generator->dst = entry->tmp;
             }
@@ -275,12 +272,10 @@ fs_visitor::opt_cse_local(bblock_t *block)
             /* dest <- temp */
             if (!inst->dst.is_null()) {
                assert(inst->regs_written == entry->generator->regs_written);
-               assert(inst->dst.width == entry->generator->dst.width);
                assert(inst->dst.type == entry->tmp.type);
+               const fs_builder ibld(this, block, inst);
 
-               fs_inst *copy = create_copy_instr(this, inst,
-                                                 entry->tmp, negate);
-               inst->insert_before(block, copy);
+               create_copy_instr(ibld, inst, entry->tmp, negate);
             }
 
             /* Set our iterator so that next time through the loop inst->next