mesa: add/update comments in _mesa_copy_buffer_subdata()
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_schedule_instructions.cpp
index ed88aa689d04e50efcd92e745dc48e12a754909f..910f3297d27d14a34f16280356478319cc2e9858 100644 (file)
  *
  */
 
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
 #include "brw_fs.h"
-#include "../glsl/glsl_types.h"
-#include "../glsl/ir_optimization.h"
-#include "../glsl/ir_print_visitor.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_optimization.h"
+#include "glsl/ir_print_visitor.h"
 
 /** @file brw_fs_schedule_instructions.cpp
  *
@@ -84,26 +69,28 @@ public:
       int math_latency = 22;
 
       switch (inst->opcode) {
-      case FS_OPCODE_RCP:
+      case SHADER_OPCODE_RCP:
         this->latency = 1 * chans * math_latency;
         break;
-      case FS_OPCODE_RSQ:
+      case SHADER_OPCODE_RSQ:
         this->latency = 2 * chans * math_latency;
         break;
-      case FS_OPCODE_SQRT:
-      case FS_OPCODE_LOG2:
+      case SHADER_OPCODE_INT_QUOTIENT:
+      case SHADER_OPCODE_SQRT:
+      case SHADER_OPCODE_LOG2:
         /* full precision log.  partial is 2. */
         this->latency = 3 * chans * math_latency;
         break;
-      case FS_OPCODE_EXP2:
+      case SHADER_OPCODE_INT_REMAINDER:
+      case SHADER_OPCODE_EXP2:
         /* full precision.  partial is 3, same throughput. */
         this->latency = 4 * chans * math_latency;
         break;
-      case FS_OPCODE_POW:
+      case SHADER_OPCODE_POW:
         this->latency = 8 * chans * math_latency;
         break;
-      case FS_OPCODE_SIN:
-      case FS_OPCODE_COS:
+      case SHADER_OPCODE_SIN:
+      case SHADER_OPCODE_COS:
         /* minimum latency, max is 12 rounds. */
         this->latency = 5 * chans * math_latency;
         break;
@@ -263,6 +250,12 @@ instruction_scheduler::calculate_deps()
    schedule_node *last_grf_write[virtual_grf_count];
    schedule_node *last_mrf_write[BRW_MAX_MRF];
    schedule_node *last_conditional_mod = NULL;
+   /* Fixed HW registers are assumed to be separate from the virtual
+    * GRFs, so they can be tracked separately.  We don't really write
+    * to fixed GRFs much, so don't bother tracking them on a more
+    * granular level.
+    */
+   schedule_node *last_fixed_grf_write = NULL;
 
    /* The last instruction always needs to still be the last
     * instruction.  Either it's flow control (IF, ELSE, ENDIF, DO,
@@ -277,14 +270,18 @@ instruction_scheduler::calculate_deps()
    memset(last_mrf_write, 0, sizeof(last_mrf_write));
 
    /* top-to-bottom dependencies: RAW and WAW. */
-   foreach_iter(exec_list_iterator, iter, instructions) {
-      schedule_node *n = (schedule_node *)iter.get();
+   foreach_list(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
       fs_inst *inst = n->inst;
 
       /* read-after-write deps. */
       for (int i = 0; i < 3; i++) {
         if (inst->src[i].file == GRF) {
            add_dep(last_grf_write[inst->src[i].reg], n);
+        } else if (inst->src[i].file == FIXED_HW_REG &&
+                   (inst->src[i].fixed_hw_reg.file ==
+                    BRW_GENERAL_REGISTER_FILE)) {
+           add_dep(last_fixed_grf_write, n);
         } else if (inst->src[i].file != BAD_FILE &&
                    inst->src[i].file != IMM &&
                    inst->src[i].file != UNIFORM) {
@@ -311,18 +308,21 @@ instruction_scheduler::calculate_deps()
         add_dep(last_grf_write[inst->dst.reg], n);
         last_grf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == MRF) {
-        int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+        int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
 
         add_dep(last_mrf_write[reg], n);
         last_mrf_write[reg] = n;
         if (is_compressed(inst)) {
-           if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+           if (inst->dst.reg & BRW_MRF_COMPR4)
               reg += 4;
            else
               reg++;
            add_dep(last_mrf_write[reg], n);
            last_mrf_write[reg] = n;
         }
+      } else if (inst->dst.file == FIXED_HW_REG &&
+                inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+        last_fixed_grf_write = n;
       } else if (inst->dst.file != BAD_FILE) {
         add_barrier_deps(n);
       }
@@ -344,6 +344,7 @@ instruction_scheduler::calculate_deps()
    memset(last_grf_write, 0, sizeof(last_grf_write));
    memset(last_mrf_write, 0, sizeof(last_mrf_write));
    last_conditional_mod = NULL;
+   last_fixed_grf_write = NULL;
 
    exec_node *node;
    exec_node *prev;
@@ -357,6 +358,10 @@ instruction_scheduler::calculate_deps()
       for (int i = 0; i < 3; i++) {
         if (inst->src[i].file == GRF) {
            add_dep(n, last_grf_write[inst->src[i].reg]);
+        } else if (inst->src[i].file == FIXED_HW_REG &&
+                   (inst->src[i].fixed_hw_reg.file ==
+                    BRW_GENERAL_REGISTER_FILE)) {
+           add_dep(n, last_fixed_grf_write);
         } else if (inst->src[i].file != BAD_FILE &&
                    inst->src[i].file != IMM &&
                    inst->src[i].file != UNIFORM) {
@@ -383,18 +388,21 @@ instruction_scheduler::calculate_deps()
       if (inst->dst.file == GRF) {
         last_grf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == MRF) {
-        int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+        int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
 
         last_mrf_write[reg] = n;
 
         if (is_compressed(inst)) {
-           if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+           if (inst->dst.reg & BRW_MRF_COMPR4)
               reg += 4;
            else
               reg++;
 
            last_mrf_write[reg] = n;
         }
+      } else if (inst->dst.file == FIXED_HW_REG &&
+                inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+        last_fixed_grf_write = n;
       } else if (inst->dst.file != BAD_FILE) {
         add_barrier_deps(n);
       }
@@ -416,8 +424,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
    int time = 0;
 
    /* Remove non-DAG heads from the list. */
-   foreach_iter(exec_list_iterator, iter, instructions) {
-      schedule_node *n = (schedule_node *)iter.get();
+   foreach_list_safe(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
       if (n->parent_count != 0)
         n->remove();
    }
@@ -426,8 +434,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
       schedule_node *chosen = NULL;
       int chosen_time = 0;
 
-      foreach_iter(exec_list_iterator, iter, instructions) {
-        schedule_node *n = (schedule_node *)iter.get();
+      foreach_list(node, &instructions) {
+        schedule_node *n = (schedule_node *)node;
 
         if (!chosen || n->unblocked_time < chosen_time) {
            chosen = n;
@@ -469,8 +477,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
        * progress until the first is done.
        */
       if (chosen->inst->is_math()) {
-        foreach_iter(exec_list_iterator, iter, instructions) {
-           schedule_node *n = (schedule_node *)iter.get();
+        foreach_list(node, &instructions) {
+           schedule_node *n = (schedule_node *)node;
 
            if (n->inst->is_math())
               n->unblocked_time = MAX2(n->unblocked_time,