r300g/compiler/tests: Add an assembly parser
[mesa.git] / src / gallium / drivers / r300 / compiler / radeon_optimize.c
index 39dcb21d4f4b6eca191678aab95fc5a6b64d305d..3bbfe898cc2ad599cc43da9ea73a7e61357b5d9a 100644 (file)
@@ -30,7 +30,9 @@
 
 #include "radeon_compiler.h"
 #include "radeon_compiler_util.h"
+#include "radeon_list.h"
 #include "radeon_swizzle.h"
+#include "radeon_variable.h"
 
 struct src_clobbered_reads_cb_data {
        rc_register_file File;
@@ -141,8 +143,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
        unsigned int i;
 
        if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
-           inst_mov->U.I.WriteALUResult ||
-           inst_mov->U.I.SaturateMode)
+           inst_mov->U.I.WriteALUResult)
                return;
 
        /* Get a list of all the readers of this MOV instruction. */
@@ -154,6 +155,22 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
        if (reader_data.Abort || reader_data.ReaderCount == 0)
                return;
 
+       /* We can propagate SaturateMode if all the readers are MOV instructions
+        * without a presubtract operation, source negation and absolute.
+        * In that case, we just move SaturateMode to all readers. */
+        if (inst_mov->U.I.SaturateMode) {
+               for (i = 0; i < reader_data.ReaderCount; i++) {
+                       struct rc_instruction * inst = reader_data.Readers[i].Inst;
+
+                       if (inst->U.I.Opcode != RC_OPCODE_MOV ||
+                           inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
+                           inst->U.I.SrcReg[0].Abs ||
+                           inst->U.I.SrcReg[0].Negate) {
+                               return;
+                       }
+               }
+       }
+
        /* Propagate the MOV instruction. */
        for (i = 0; i < reader_data.ReaderCount; i++) {
                struct rc_instruction * inst = reader_data.Readers[i].Inst;
@@ -161,6 +178,8 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
 
                if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
                        inst->U.I.PreSub = inst_mov->U.I.PreSub;
+               if (!inst->U.I.SaturateMode)
+                       inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
        }
 
        /* Finally, remove the original MOV instruction */
@@ -519,7 +538,8 @@ static int is_presub_candidate(
 
        if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
                        || inst->U.I.SaturateMode
-                       || inst->U.I.WriteALUResult) {
+                       || inst->U.I.WriteALUResult
+                       || inst->U.I.Omod) {
                return 0;
        }
 
@@ -658,6 +678,179 @@ static int peephole_add_presub_inv(
        return 0;
 }
 
+struct peephole_mul_cb_data {
+       struct rc_dst_register * Writer;
+       unsigned int Clobbered;
+};
+
+static void omod_filter_reader_cb(
+       void * userdata,
+       struct rc_instruction * inst,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int mask)
+{
+       struct peephole_mul_cb_data * d = userdata;
+       if (rc_src_reads_dst_mask(file, mask, index,
+               d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
+
+               d->Clobbered = 1;
+       }
+}
+
+static void omod_filter_writer_cb(
+       void * userdata,
+       struct rc_instruction * inst,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int mask)
+{
+       struct peephole_mul_cb_data * d = userdata;
+       if (file == d->Writer->File && index == d->Writer->Index &&
+                                       (mask & d->Writer->WriteMask)) {
+               d->Clobbered = 1;
+       }
+}
+
+static int peephole_mul_omod(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_mul,
+       struct rc_list * var_list)
+{
+       unsigned int chan = 0, swz, i;
+       int const_index = -1;
+       int temp_index = -1;
+       float const_value;
+       rc_omod_op omod_op = RC_OMOD_DISABLE;
+       struct rc_list * writer_list;
+       struct rc_variable * var;
+       struct peephole_mul_cb_data cb_data;
+       unsigned writemask_sum;
+
+       for (i = 0; i < 2; i++) {
+               unsigned int j;
+               if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
+                       && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
+                       return 0;
+               }
+               if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+                       if (temp_index != -1) {
+                               /* The instruction has two temp sources */
+                               return 0;
+                       } else {
+                               temp_index = i;
+                               continue;
+                       }
+               }
+               /* If we get this far Src[i] must be a constant src */
+               if (inst_mul->U.I.SrcReg[i].Negate) {
+                       return 0;
+               }
+               /* The constant src needs to read from the same swizzle */
+               swz = RC_SWIZZLE_UNUSED;
+               chan = 0;
+               for (j = 0; j < 4; j++) {
+                       unsigned int j_swz =
+                               GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
+                       if (j_swz == RC_SWIZZLE_UNUSED) {
+                               continue;
+                       }
+                       if (swz == RC_SWIZZLE_UNUSED) {
+                               swz = j_swz;
+                               chan = j;
+                       } else if (j_swz != swz) {
+                               return 0;
+                       }
+               }
+
+               if (const_index != -1) {
+                       /* The instruction has two constant sources */
+                       return 0;
+               } else {
+                       const_index = i;
+               }
+       }
+
+       if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
+                               inst_mul->U.I.SrcReg[const_index].Index)) {
+               return 0;
+       }
+       const_value = rc_get_constant_value(c,
+                       inst_mul->U.I.SrcReg[const_index].Index,
+                       inst_mul->U.I.SrcReg[const_index].Swizzle,
+                       inst_mul->U.I.SrcReg[const_index].Negate,
+                       chan);
+
+       if (const_value == 2.0f) {
+               omod_op = RC_OMOD_MUL_2;
+       } else if (const_value == 4.0f) {
+               omod_op = RC_OMOD_MUL_4;
+       } else if (const_value == 8.0f) {
+               omod_op = RC_OMOD_MUL_8;
+       } else if (const_value == (1.0f / 2.0f)) {
+               omod_op = RC_OMOD_DIV_2;
+       } else if (const_value == (1.0f / 4.0f)) {
+               omod_op = RC_OMOD_DIV_4;
+       } else if (const_value == (1.0f / 8.0f)) {
+               omod_op = RC_OMOD_DIV_8;
+       } else {
+               return 0;
+       }
+
+       writer_list = rc_variable_list_get_writers_one_reader(var_list,
+               RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
+
+       if (!writer_list) {
+               return 0;
+       }
+
+       cb_data.Clobbered = 0;
+       cb_data.Writer = &inst_mul->U.I.DstReg;
+       for (var = writer_list->Item; var; var = var->Friend) {
+               struct rc_instruction * inst;
+               const struct rc_opcode_info * info = rc_get_opcode_info(
+                               var->Inst->U.I.Opcode);
+               if (info->HasTexture) {
+                       return 0;
+               }
+               if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
+                       return 0;
+               }
+               for (inst = inst_mul->Prev; inst != var->Inst;
+                                                       inst = inst->Prev) {
+                       rc_for_all_reads_mask(inst, omod_filter_reader_cb,
+                                                               &cb_data);
+                       rc_for_all_writes_mask(inst, omod_filter_writer_cb,
+                                                               &cb_data);
+                       if (cb_data.Clobbered) {
+                               break;
+                       }
+               }
+       }
+
+       if (cb_data.Clobbered) {
+               return 0;
+       }
+
+       /* Rewrite the instructions */
+       writemask_sum = rc_variable_writemask_sum(writer_list->Item);
+       for (var = writer_list->Item; var; var = var->Friend) {
+               struct rc_variable * writer = var;
+               unsigned conversion_swizzle = rc_make_conversion_swizzle(
+                                       writemask_sum,
+                                       inst_mul->U.I.DstReg.WriteMask);
+               writer->Inst->U.I.Omod = omod_op;
+               writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
+               writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
+               rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
+               writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
+       }
+
+       rc_remove_instruction(inst_mul);
+
+       return 1;
+}
+
 /**
  * @return
  *     0 if inst is still part of the program.
@@ -683,6 +876,7 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
 void rc_optimize(struct radeon_compiler * c, void *user)
 {
        struct rc_instruction * inst = c->Program.Instructions.Next;
+       struct rc_list * var_list;
        while(inst != &c->Program.Instructions) {
                struct rc_instruction * cur = inst;
                inst = inst->Next;
@@ -697,4 +891,18 @@ void rc_optimize(struct radeon_compiler * c, void *user)
                        /* cur may no longer be part of the program */
                }
        }
+
+       if (!c->has_omod) {
+               return;
+       }
+
+       inst = c->Program.Instructions.Next;
+       while(inst != &c->Program.Instructions) {
+               struct rc_instruction * cur = inst;
+               inst = inst->Next;
+               if (cur->U.I.Opcode == RC_OPCODE_MUL) {
+                       var_list = rc_get_variables(c);
+                       peephole_mul_omod(c, cur, var_list);
+               }
+       }
 }