r300/compiler: Use ALU Result for IF conditionals
authorTom Stellard <tstellar@gmail.com>
Sun, 15 May 2011 04:47:26 +0000 (21:47 -0700)
committerTom Stellard <tstellar@gmail.com>
Sun, 15 May 2011 05:35:28 +0000 (22:35 -0700)
This saves one instruction per IF.

src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
src/mesa/drivers/dri/r300/compiler/radeon_variable.c
src/mesa/drivers/dri/r300/compiler/radeon_variable.h

index e2441e97d875ca9468ac4a53d4a8431f4a8721d4..bb6c010e8e3e2959832a008fda0f34ac7104cf52 100644 (file)
@@ -109,8 +109,12 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
                { 0, 0 }
        };
 
-       struct radeon_program_transformation native_rewrite_r500[] = {
+       struct radeon_program_transformation rewrite_if[] = {
                { &r500_transform_IF, 0 },
+               {0, 0}
+       };
+
+       struct radeon_program_transformation native_rewrite_r500[] = {
                { &radeonTransformALU, 0 },
                { &radeonTransformDeriv, 0 },
                { &radeonTransformTrigScale, 0 },
@@ -135,6 +139,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
                {"emulate branches",            1, !is_r500,    rc_emulate_branches,            NULL},
                {"saturate output writes",      1, sat_out,     rc_local_transform,             saturate_output},
                {"transform TEX",               1, 1,           rc_local_transform,             rewrite_tex},
+               {"transform IF",                1, is_r500,     rc_local_transform,             rewrite_if},
                {"native rewrite",              1, is_r500,     rc_local_transform,             native_rewrite_r500},
                {"native rewrite",              1, !is_r500,    rc_local_transform,             native_rewrite_r300},
                {"deadcode",                    1, opt,         rc_dataflow_deadcode,           dataflow_outputs_mark_use},
index 5e0be6b8881d80488910a2fde561a4e436c3a8ce..cf99f5e453891a8acbe25ebf24cd73ae601886af 100644 (file)
@@ -30,6 +30,8 @@
 #include <stdio.h>
 
 #include "radeon_compiler_util.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
 #include "../r300_reg.h"
 
 /**
  */
 int r500_transform_IF(
        struct radeon_compiler * c,
-       struct rc_instruction * inst,
-       voiddata)
+       struct rc_instruction * inst_if,
+       void *data)
 {
-       struct rc_instruction * inst_mov;
+       struct rc_variable * writer;
+       struct rc_list * writer_list, * list_ptr;
+       struct rc_list * var_list = rc_get_variables(c);
+       unsigned int generic_if = 0;
+       unsigned int alu_chan;
 
-       if (inst->U.I.Opcode != RC_OPCODE_IF)
+       if (inst_if->U.I.Opcode != RC_OPCODE_IF) {
                return 0;
+       }
+
+       writer_list = rc_variable_list_get_writers(
+                       var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
+       if (!writer_list) {
+               generic_if = 1;
+       } else {
+
+               /* Make sure it is safe for the writers to write to
+                * ALU Result */
+               for (list_ptr = writer_list; list_ptr;
+                                               list_ptr = list_ptr->Next) {
+                       struct rc_instruction * inst;
+                       writer = list_ptr->Item;
+                       /* We are going to modify the destination register
+                        * of writer, so if it has a reader other than
+                        * inst_if (aka ReaderCount > 1) we must fall back to
+                        * our generic IF.
+                        * If the writer has a lower IP than inst_if, this
+                        * means that inst_if is above the writer in a loop.
+                        * I'm not sure why this would ever happen, but
+                        * if it does we want to make sure we fall back
+                        * to our generic IF. */
+                       if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
+                               generic_if = 1;
+                               break;
+                       }
+
+                       /* The ALU Result is not preserved across IF
+                        * instructions, so if there is another IF
+                        * instruction between writer and inst_if, then
+                        * we need to fall back to generic IF. */
+                       for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
+                               const struct rc_opcode_info * info =
+                                       rc_get_opcode_info(inst->U.I.Opcode);
+                               if (info->IsFlowControl) {
+                                       generic_if = 1;
+                                       break;
+                               }
+                       }
+                       if (generic_if) {
+                               break;
+                       }
+               }
+       }
+
+       if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
+               alu_chan = RC_ALURESULT_X;
+       } else {
+               alu_chan = RC_ALURESULT_W;
+       }
+       if (generic_if) {
+               struct rc_instruction * inst_mov =
+                               rc_insert_new_instruction(c, inst_if->Prev);
+
+               inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+               inst_mov->U.I.DstReg.WriteMask = 0;
+               inst_mov->U.I.DstReg.File = RC_FILE_NONE;
+               inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+               inst_mov->U.I.WriteALUResult = alu_chan;
+               inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+               if (alu_chan == RC_ALURESULT_X) {
+                       inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+                                       inst_mov->U.I.SrcReg[0].Swizzle,
+                                       RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+                                       RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+               } else {
+                       inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+                                       inst_mov->U.I.SrcReg[0].Swizzle,
+                                       RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
+                                       RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
+               }
+       } else {
+               rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
+               unsigned int reverse_srcs = 0;
+               unsigned int preserve_opcode = 0;
+               for (list_ptr = writer_list; list_ptr;
+                                               list_ptr = list_ptr->Next) {
+                       writer = list_ptr->Item;
+                       switch(writer->Inst->U.I.Opcode) {
+                       case RC_OPCODE_SEQ:
+                               compare_func = RC_COMPARE_FUNC_EQUAL;
+                               break;
+                       case RC_OPCODE_SNE:
+                               compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+                               break;
+                       case RC_OPCODE_SLE:
+                               reverse_srcs = 1;
+                               /* Fall through */
+                       case RC_OPCODE_SGE:
+                               compare_func = RC_COMPARE_FUNC_GEQUAL;
+                               break;
+                       case RC_OPCODE_SGT:
+                               reverse_srcs = 1;
+                               /* Fall through */
+                       case RC_OPCODE_SLT:
+                               compare_func = RC_COMPARE_FUNC_LESS;
+                               break;
+                       default:
+                               compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+                               preserve_opcode = 1;
+                               break;
+                       }
+                       if (!preserve_opcode) {
+                               writer->Inst->U.I.Opcode = RC_OPCODE_SUB;
+                       }
+                       writer->Inst->U.I.DstReg.WriteMask = 0;
+                       writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
+                       writer->Inst->U.I.WriteALUResult = alu_chan;
+                       writer->Inst->U.I.ALUResultCompare = compare_func;
+                       if (reverse_srcs) {
+                               struct rc_src_register temp_src;
+                               temp_src = writer->Inst->U.I.SrcReg[0];
+                               writer->Inst->U.I.SrcReg[0] =
+                                       writer->Inst->U.I.SrcReg[1];
+                               writer->Inst->U.I.SrcReg[1] = temp_src;
+                       }
+               }
+       }
 
-       inst_mov = rc_insert_new_instruction(c, inst->Prev);
-       inst_mov->U.I.Opcode = RC_OPCODE_MOV;
-       inst_mov->U.I.DstReg.WriteMask = 0;
-       inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
-       inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
-       inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
-       inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle,
-                       RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X);
-
-       inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
-       inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
-       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
-       inst->U.I.SrcReg[0].Negate = 0;
+       inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+       inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+       inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(
+                               RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+                               RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+       inst_if->U.I.SrcReg[0].Negate = 0;
 
        return 1;
 }
index 1e665e27641ebbfd0d7cc7c6f7e18dcfad3ce49f..6aa448cc6f7f552c03d25de6d516ed7ea0c1b0fa 100644 (file)
@@ -44,7 +44,7 @@ extern struct rc_swizzle_caps r500_swizzle_caps;
 
 extern int r500_transform_IF(
        struct radeon_compiler * c,
-       struct rc_instruction * inst,
+       struct rc_instruction * inst_if,
        void* data);
 
 #endif
index 53ab5fbbbd9bb12ba9a9127747ce9441d4b27c7c..ac73608839e4dfb66a47d0de5eb0356952804ebe 100644 (file)
@@ -517,8 +517,11 @@ static int is_presub_candidate(
 
        assert(inst->U.I.Opcode == RC_OPCODE_ADD);
 
-       if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
+       if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
+                       || inst->U.I.SaturateMode
+                       || inst->U.I.WriteALUResult) {
                return 0;
+       }
 
        /* If both sources use a constant swizzle, then we can't convert it to
         * a presubtract operation.  In fact for the ADD and SUB presubtract
index 5b2295dc7ce4e322478fcfe7d8710aaa00477b27..33181bdcc888c96551312e1f5aad029bece47fe2 100644 (file)
@@ -469,6 +469,67 @@ struct rc_list * rc_variable_readers_union(struct rc_variable * var)
        return list;
 }
 
+static unsigned int reader_equals_src(
+       struct rc_reader reader,
+       unsigned int src_type,
+       void * src)
+{
+       if (reader.Inst->Type != src_type) {
+               return 0;
+       }
+       if (src_type == RC_INSTRUCTION_NORMAL) {
+               return reader.U.I.Src == src;
+       } else {
+               return reader.U.P.Src == src;
+       }
+}
+
+static unsigned int variable_writes_src(
+       struct rc_variable * var,
+       unsigned int src_type,
+       void * src)
+{
+       unsigned int i;
+       for (i = 0; i < var->ReaderCount; i++) {
+               if (reader_equals_src(var->Readers[i], src_type, src)) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+
+struct rc_list * rc_variable_list_get_writers(
+       struct rc_list * var_list,
+       unsigned int src_type,
+       void * src)
+{
+       struct rc_list * list_ptr;
+       struct rc_list * writer_list = NULL;
+       for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
+               struct rc_variable * var = list_ptr->Item;
+               if (variable_writes_src(var, src_type, src)) {
+                       struct rc_variable * friend;
+                       rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
+                       for (friend = var->Friend; friend;
+                                               friend = friend->Friend) {
+                               if (variable_writes_src(friend, src_type, src)) {
+                                       rc_list_add(&writer_list,
+                                               rc_list(&var->C->Pool, friend));
+                               }
+                       }
+                       /* Once we have indentifed the variable and its
+                        * friends that write this source, we can stop
+                        * stop searching, because we know know of the
+                        * other variables in the list will write this source.
+                        * If they did they would be friends of var.
+                        */
+                       break;
+               }
+       }
+       return writer_list;
+}
+
 void rc_variable_print(struct rc_variable * var)
 {
        unsigned int i;
index b8fbcaa4029a00e39a3c236090e3959592b9b23d..9427bee18a70e6700fbd7edbd95ccc9b3649fd4f 100644 (file)
@@ -79,6 +79,11 @@ unsigned int rc_variable_writemask_sum(struct rc_variable * var);
 
 struct rc_list * rc_variable_readers_union(struct rc_variable * var);
 
+struct rc_list * rc_variable_list_get_writers(
+       struct rc_list * var_list,
+       unsigned int src_type,
+       void * src);
+
 void rc_variable_print(struct rc_variable * var);
 
 #endif /* RADEON_VARIABLE_H */