freedreno/a3xx/compiler: collapse nop's with repeat
authorRob Clark <robclark@freedesktop.org>
Fri, 21 Feb 2014 23:03:30 +0000 (18:03 -0500)
committerRob Clark <robclark@freedesktop.org>
Sun, 23 Feb 2014 19:58:23 +0000 (14:58 -0500)
Easier than making more extensive use of rpt, and the more compact
shaders seem to bring some bit of performance boost.  (Perhaps repeat
flag benefits are more than just instruction cache, possibly it saves
on instruction decode as well?)

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/ir3.h
src/gallium/drivers/freedreno/a3xx/ir3_ra.c

index 9c57a65355384cac37058ab141b1f38c279905bf..894db175076a5275f2fea4abca64b9157a158667 100644 (file)
@@ -312,6 +312,11 @@ static inline bool is_flow(struct ir3_instruction *instr)
        return (instr->category == 0);
 }
 
+static inline bool is_nop(struct ir3_instruction *instr)
+{
+       return is_flow(instr) && (instr->opc == OPC_NOP);
+}
+
 static inline bool is_alu(struct ir3_instruction *instr)
 {
        return (1 <= instr->category) && (instr->category <= 3);
index 5df57e776f90d95aa3f1a79de821226afefa8a4c..06a86ff3b2d9dfc4ecc055d1ec1511d5df2daa7c 100644 (file)
@@ -542,6 +542,16 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                if ((shader->instrs_count == 0) && (n->category >= 5))
                        ir3_instr_create(block, 0, OPC_NOP);
 
+               if (is_nop(n) && shader->instrs_count) {
+                       struct ir3_instruction *last =
+                                       shader->instrs[shader->instrs_count-1];
+                       if (is_nop(last) && (last->repeat < 5)) {
+                               last->repeat++;
+                               last->flags |= n->flags;
+                               continue;
+                       }
+               }
+
                shader->instrs[shader->instrs_count++] = n;
 
                if (is_sfu(n))