i965/vec4: Perform CSE on CMP(N) instructions.
authorMatt Turner <mattst88@gmail.com>
Wed, 11 Jun 2014 20:49:34 +0000 (13:49 -0700)
committerMatt Turner <mattst88@gmail.com>
Mon, 7 Jul 2014 01:19:15 +0000 (18:19 -0700)
Port of commit b16b3c87 to the vec4 code.

No shader-db improvements, but might as well. The fs backend saw an
improvement because it's scalar and multiple identical CMP instructions
were generated by the SEL peepholes.

src/mesa/drivers/dri/i965/brw_vec4_cse.cpp

index f3a69ed9fb6b67ac9d65a502f72465a2abefd1a5..eeaa743e15f7807065905ca2dddddf3e3143d510 100644 (file)
@@ -56,6 +56,8 @@ is_expression(const vec4_instruction *const inst)
    case BRW_OPCODE_SHR:
    case BRW_OPCODE_SHL:
    case BRW_OPCODE_ASR:
+   case BRW_OPCODE_CMP:
+   case BRW_OPCODE_CMPN:
    case BRW_OPCODE_ADD:
    case BRW_OPCODE_MUL:
    case BRW_OPCODE_FRC:
@@ -135,7 +137,7 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 
       /* Skip some cases. */
       if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
-          !inst->conditional_mod)
+          (inst->dst.file != HW_REG || inst->dst.is_null()))
       {
          bool found = false;
 
@@ -195,6 +197,19 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
       }
 
       foreach_in_list_safe(aeb_entry, entry, aeb) {
+         /* Kill all AEB entries that write a different value to or read from
+          * the flag register if we just wrote it.
+          */
+         if (inst->writes_flag()) {
+            if (entry->generator->reads_flag() ||
+                (entry->generator->writes_flag() &&
+                 !instructions_match(inst, entry->generator))) {
+               entry->remove();
+               ralloc_free(entry);
+               continue;
+            }
+         }
+
          for (int i = 0; i < 3; i++) {
             src_reg *src = &entry->generator->src[i];