i915g: Fix bugs in the shader optimizer.
authorStéphane Marchesin <marcheu@chromium.org>
Wed, 11 Jan 2012 10:24:34 +0000 (02:24 -0800)
committerStéphane Marchesin <marcheu@chromium.org>
Wed, 11 Jan 2012 10:25:42 +0000 (02:25 -0800)
src/gallium/drivers/i915/i915_fpc_optimize.c

index d9b4c46b909c57917bab416a5f65f9eae1b75087..b09f18b01ee174d6ff5baa80265b3adf03489005 100644 (file)
@@ -66,6 +66,8 @@ static boolean has_destination(unsigned opcode)
 {
    return (opcode != TGSI_OPCODE_NOP &&
            opcode != TGSI_OPCODE_KIL &&
+           opcode != TGSI_OPCODE_KILP &&
+           opcode != TGSI_OPCODE_END &&
            opcode != TGSI_OPCODE_RET);
 }
 
@@ -130,6 +132,50 @@ static void set_neutral_element_swizzle(struct i915_full_src_register* r,
       r->Register.SwizzleW = TGSI_SWIZZLE_W;
 }
 
+static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
+{
+   o->File      = i->File;
+   o->Indirect  = i->Indirect;
+   o->Dimension = i->Dimension;
+   o->Index     = i->Index;
+   o->SwizzleX  = i->SwizzleX;
+   o->SwizzleY  = i->SwizzleY;
+   o->SwizzleZ  = i->SwizzleZ;
+   o->SwizzleW  = i->SwizzleW;
+   o->Absolute  = i->Absolute;
+   o->Negate    = i->Negate;
+}
+
+static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
+{
+   o->File      = i->File;
+   o->WriteMask = i->WriteMask;
+   o->Indirect  = i->Indirect;
+   o->Dimension = i->Dimension;
+   o->Index     = i->Index;
+}
+
+static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
+{
+   memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
+   memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
+
+   copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
+
+   copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
+   copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
+   copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
+}
+
+static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
+{
+   if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+      memcpy(o, i, sizeof(*o));
+   else
+      copy_instruction(&o->FullInstruction, &i->FullInstruction);
+
+}
+
 /*
  * Optimize away things like:
  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
@@ -147,6 +193,7 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio
         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
+        !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
@@ -170,6 +217,7 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio
         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
+        !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
@@ -187,6 +235,30 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio
    }
 }
 
+/*
+ * Optimize away things like:
+ *    MOV TEMP[0].xyz TEMP[0].xyzx
+ * into:
+ *    NOP
+ */
+static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current)
+{
+   union i915_full_token current;
+   copy_token(&current , tgsi_current);
+   if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
+        current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+        has_destination(current.FullInstruction.Instruction.Opcode) &&
+        current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
+        current.FullInstruction.Src[0].Register.Absolute == 0 &&
+        current.FullInstruction.Src[0].Register.Negate == 0 &&
+        is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
+        same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
+   {
+      return TRUE;
+   }
+   return FALSE;
+}
+
 /*
  * Optimize away things like:
  *    *** TEMP[0], TEMP[1], TEMP[2]
@@ -194,7 +266,7 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio
  * into:
  *    *** OUT[0], TEMP[1], TEMP[2]
  */
-static void i915_fpc_optimize_useless_mov(union i915_full_token* current, union i915_full_token* next)
+static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next)
 {
    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
@@ -215,50 +287,6 @@ static void i915_fpc_optimize_useless_mov(union i915_full_token* current, union
    }
 }
 
-static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
-{
-   o->File      = i->File;
-   o->Indirect  = i->Indirect;
-   o->Dimension = i->Dimension;
-   o->Index     = i->Index;
-   o->SwizzleX  = i->SwizzleX;
-   o->SwizzleY  = i->SwizzleY;
-   o->SwizzleZ  = i->SwizzleZ;
-   o->SwizzleW  = i->SwizzleW;
-   o->Absolute  = i->Absolute;
-   o->Negate    = i->Negate;
-}
-
-static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
-{
-   o->File      = i->File;
-   o->WriteMask = i->WriteMask;
-   o->Indirect  = i->Indirect;
-   o->Dimension = i->Dimension;
-   o->Index     = i->Index;
-}
-
-static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
-{
-   memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
-   memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
-
-   copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
-
-   copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
-   copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
-   copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
-}
-
-static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
-{
-   if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
-      memcpy(o, i, sizeof(*o));
-   else
-      copy_instruction(&o->FullInstruction, &i->FullInstruction);
-
-}
-
 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
 {
    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
@@ -281,10 +309,16 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
    tgsi_parse_init( &parse, tokens );
    while( !tgsi_parse_end_of_tokens( &parse ) ) {
       tgsi_parse_token( &parse );
+
+      if (i915_fpc_useless_mov(&parse.FullToken)) {
+         out_tokens->NumTokens--;
+         continue;
+      }
+
       copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
 
       if (i > 0) {
-         i915_fpc_optimize_useless_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
+         i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
          i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
       }
       i++;