i965/gen8: Add instruction compaction tables.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_cse.cpp
index 296142b740f8ad38b8ac0d87dc1ba030271717d5..29d2e026205bbcb9b98096512d4ce2d6fdd761a1 100644 (file)
@@ -56,6 +56,8 @@ is_expression(const vec4_instruction *const inst)
    case BRW_OPCODE_SHR:
    case BRW_OPCODE_SHL:
    case BRW_OPCODE_ASR:
+   case BRW_OPCODE_CMP:
+   case BRW_OPCODE_CMPN:
    case BRW_OPCODE_ADD:
    case BRW_OPCODE_MUL:
    case BRW_OPCODE_FRC:
@@ -122,24 +124,22 @@ instructions_match(vec4_instruction *a, vec4_instruction *b)
 }
 
 bool
-vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
+vec4_visitor::opt_cse_local(bblock_t *block)
 {
    bool progress = false;
+   exec_list aeb;
 
    void *cse_ctx = ralloc_context(NULL);
 
    int ip = block->start_ip;
-   for (vec4_instruction *inst = (vec4_instruction *)block->start;
-        inst != block->end->next;
-        inst = (vec4_instruction *) inst->next) {
-
+   foreach_inst_in_block (vec4_instruction, inst, block) {
       /* Skip some cases. */
       if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
-          !inst->conditional_mod)
+          (inst->dst.file != HW_REG || inst->dst.is_null()))
       {
          bool found = false;
 
-         foreach_in_list_use_after(aeb_entry, entry, aeb) {
+         foreach_in_list_use_after(aeb_entry, entry, &aeb) {
             /* Match current instruction's expression against those in AEB. */
             if (instructions_match(inst, entry->generator)) {
                found = true;
@@ -153,13 +153,13 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
             aeb_entry *entry = ralloc(cse_ctx, aeb_entry);
             entry->tmp = src_reg(); /* file will be BAD_FILE */
             entry->generator = inst;
-            aeb->push_tail(entry);
+            aeb.push_tail(entry);
          } else {
             /* This is at least our second sighting of this expression.
              * If we don't have a temporary already, make one.
              */
             bool no_existing_temp = entry->tmp.file == BAD_FILE;
-            if (no_existing_temp) {
+            if (no_existing_temp && !entry->generator->dst.is_null()) {
                entry->tmp = src_reg(this, glsl_type::float_type);
                entry->tmp.type = inst->dst.type;
                entry->tmp.swizzle = BRW_SWIZZLE_XYZW;
@@ -170,10 +170,12 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
             }
 
             /* dest <- temp */
-            assert(inst->dst.type == entry->tmp.type);
-            vec4_instruction *copy = MOV(inst->dst, entry->tmp);
-            copy->force_writemask_all = inst->force_writemask_all;
-            inst->insert_before(copy);
+            if (!inst->dst.is_null()) {
+               assert(inst->dst.type == entry->tmp.type);
+               vec4_instruction *copy = MOV(inst->dst, entry->tmp);
+               copy->force_writemask_all = inst->force_writemask_all;
+               inst->insert_before(copy);
+            }
 
             /* Set our iterator so that next time through the loop inst->next
              * will get the instruction in the basic block after the one we've
@@ -192,7 +194,20 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
          }
       }
 
-      foreach_in_list_safe(aeb_entry, entry, aeb) {
+      foreach_in_list_safe(aeb_entry, entry, &aeb) {
+         /* Kill all AEB entries that write a different value to or read from
+          * the flag register if we just wrote it.
+          */
+         if (inst->writes_flag()) {
+            if (entry->generator->reads_flag() ||
+                (entry->generator->writes_flag() &&
+                 !instructions_match(inst, entry->generator))) {
+               entry->remove();
+               ralloc_free(entry);
+               continue;
+            }
+         }
+
          for (int i = 0; i < 3; i++) {
             src_reg *src = &entry->generator->src[i];
 
@@ -226,9 +241,6 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 
    ralloc_free(cse_ctx);
 
-   if (progress)
-      invalidate_live_intervals();
-
    return progress;
 }
 
@@ -239,14 +251,14 @@ vec4_visitor::opt_cse()
 
    calculate_live_intervals();
 
-   cfg_t cfg(&instructions);
-
-   for (int b = 0; b < cfg.num_blocks; b++) {
-      bblock_t *block = cfg.blocks[b];
-      exec_list aeb;
+   for (int b = 0; b < cfg->num_blocks; b++) {
+      bblock_t *block = cfg->blocks[b];
 
-      progress = opt_cse_local(block, &aeb) || progress;
+      progress = opt_cse_local(block) || progress;
    }
 
+   if (progress)
+      invalidate_live_intervals();
+
    return progress;
 }