r600g: implement output modifiers and use them to further optimize LRP
authorChristian König <deathsimple@vodafone.de>
Thu, 16 Dec 2010 15:42:14 +0000 (16:42 +0100)
committerChristian König <deathsimple@vodafone.de>
Wed, 12 Jan 2011 18:44:49 +0000 (19:44 +0100)
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r700_asm.c

index 3ee54a2af15a941ad9db5b36a1ebb331b4f4d4ca..5be5e1823d29aaabe2a529b6637567409db53720 100644 (file)
@@ -831,6 +831,7 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
                                        S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
                                        S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
                                        S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+                                       S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
                                        S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
                                        S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
                                        S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
@@ -1205,6 +1206,7 @@ void r600_bc_dump(struct r600_bc *bc)
                                fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs);
                                fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs);
                                fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write);
+                               fprintf(stderr, "OMOD:%d ", alu->omod);
                                fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate);
                                fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate);
                        }
index a5504ad39f465d52462f9fd12f2318ec1b50a9c0..4763ce03ec4624ce1f4829cd49a56250ba1d3933 100644 (file)
@@ -62,6 +62,7 @@ struct r600_bc_alu {
        unsigned                        bank_swizzle_force;
        u32                             value[4];
        int                             hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+       unsigned                        omod;
 };
 
 struct r600_bc_tex {
index b853fd9dc88e39a15ed8380e1f4433e11d8a3a82..78739bf89d885b725fc838ad314b0d1cb3f2fdf3 100644 (file)
@@ -2004,6 +2004,35 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
        r = tgsi_split_literal_constant(ctx, r600_src);
        if (r)
                return r;
+
+       /* optimize if it's just an equal balance */
+       if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) {
+               for (i = 0; i < lasti + 1; i++) {
+                       if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                               continue;
+
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
+                       alu.src[0] = r600_src[1];
+                       alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
+                       alu.src[1] = r600_src[2];
+                       alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+                       alu.omod = 3;
+                       r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       if (r)
+                               return r;
+
+                       alu.dst.chan = i;
+                       if (i == lasti) {
+                               alu.last = 1;
+                       }
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+               return 0;
+       }
+
        /* 1 - src0 */
        for (i = 0; i < lasti + 1; i++) {
                if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
index 892dee86baf38c023c6a278971853c728938dc45..3eb6fb50ca721671a909508e215305dd56b76638 100644 (file)
@@ -61,6 +61,7 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
                                        S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
                                        S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
                                        S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+                                       S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
                                        S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
                                        S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
                                        S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |