i965/fs: Add support for translating ir_triop_fma into MAD.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_schedule_instructions.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index 6a52754455341a4c9bc256fbfc4ce2ae266b164f..5530683df84f1dd8cf79e34fa98bdfd984952eb0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -29,7 +29,6 @@
  #include "brw_vec4.h"
  #include "glsl/glsl_types.h"
  #include "glsl/ir_optimization.h"
-#include "glsl/ir_print_visitor.h"
  
  using namespace brw;
  
@@ -60,7 +59,7 @@ static bool debug = false;
  class schedule_node : public exec_node
  {
  public:
-   schedule_node(backend_instruction *inst, const struct intel_context *intel)
+   schedule_node(backend_instruction *inst, const struct brw_context *brw)
     {
        this->inst = inst;
        this->child_array_size = 0;
@@ -73,8 +72,8 @@ public:
        /* We can't measure Gen6 timings directly but expect them to be much
         * closer to Gen7 than Gen4.
         */
-      if (intel->gen >= 6)
-         set_latency_gen7(intel->is_haswell);
+      if (brw->gen >= 6)
+         set_latency_gen7(brw->is_haswell);
        else
           set_latency_gen4();
     }
@@ -429,7 +428,7 @@ vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
  void
  instruction_scheduler::add_inst(backend_instruction *inst)
  {
-   schedule_node *n = new(mem_ctx) schedule_node(inst, bv->intel);
+   schedule_node *n = new(mem_ctx) schedule_node(inst, bv->brw);
  
     assert(!inst->is_head_sentinel());
     assert(!inst->is_tail_sentinel());
@@ -579,7 +578,10 @@ fs_instruction_scheduler::calculate_deps()
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             if (post_reg_alloc) {
-               for (int r = 0; r < reg_width; r++)
+               int size = reg_width;
+               if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
+                  size = 1;
+               for (int r = 0; r < size; r++)
                    add_dep(last_grf_write[inst->src[i].fixed_hw_reg.nr + r], n);
              } else {
                 add_dep(last_fixed_grf_write, n);
@@ -684,7 +686,10 @@ fs_instruction_scheduler::calculate_deps()
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             if (post_reg_alloc) {
-               for (int r = 0; r < reg_width; r++)
+               int size = reg_width;
+               if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
+                  size = 1;
+               for (int r = 0; r < size; r++)
                    add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]);
              } else {
                 add_dep(n, last_fixed_grf_write);
@@ -816,7 +821,7 @@ vec4_instruction_scheduler::calculate_deps()
           add_dep(last_mrf_write[inst->base_mrf + i], n);
        }
  
-      if (inst->predicate) {
+      if (inst->depends_on_flags()) {
           assert(last_conditional_mod);
           add_dep(last_conditional_mod, n);
        }
@@ -887,7 +892,7 @@ vec4_instruction_scheduler::calculate_deps()
           add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
        }
  
-      if (inst->predicate) {
+      if (inst->depends_on_flags()) {
           add_dep(n, last_conditional_mod);
        }