i965: Move intel_context::perf_debug to brw_context.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
index 9a764089a64a90ee732d24908ade8ac491d357b8..65cb8ab8a9ed1958a8a3a2aefdfa23bc79a10b5a 100644 (file)
@@ -48,7 +48,6 @@ extern "C" {
 }
 #include "brw_fs.h"
 #include "glsl/glsl_types.h"
-#include "glsl/ir_print_visitor.h"
 
 void
 fs_inst::init()
@@ -173,6 +172,13 @@ ALU2(SHL)
 ALU2(SHR)
 ALU2(ASR)
 ALU3(LRP)
+ALU1(BFREV)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(CBIT)
 
 /** Gen4 predicated IF. */
 fs_inst *
@@ -218,7 +224,7 @@ fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition)
     */
    if (intel->gen == 4) {
       dst.type = src0.type;
-      if (dst.file == FIXED_HW_REG)
+      if (dst.file == HW_REG)
         dst.fixed_hw_reg.type = dst.type;
    }
 
@@ -405,7 +411,7 @@ fs_reg::fs_reg(uint32_t u)
 fs_reg::fs_reg(struct brw_reg fixed_hw_reg)
 {
    init();
-   this->file = FIXED_HW_REG;
+   this->file = HW_REG;
    this->fixed_hw_reg = fixed_hw_reg;
    this->type = fixed_hw_reg.type;
 }
@@ -811,6 +817,7 @@ fs_visitor::import_uniforms(fs_visitor *v)
                           import_uniforms_callback,
                           variable_ht);
    this->params_remap = v->params_remap;
+   this->nr_params_remap = v->nr_params_remap;
 }
 
 /* Our support for uniforms is piggy-backed on the struct
@@ -1212,7 +1219,7 @@ fs_visitor::assign_curb_setup()
                                                  constant_nr / 8,
                                                  constant_nr % 8);
 
-           inst->src[i].file = FIXED_HW_REG;
+           inst->src[i].file = HW_REG;
            inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type);
         }
       }
@@ -1280,12 +1287,12 @@ fs_visitor::assign_urb_setup()
       fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode == FS_OPCODE_LINTERP) {
-        assert(inst->src[2].file == FIXED_HW_REG);
+        assert(inst->src[2].file == HW_REG);
         inst->src[2].fixed_hw_reg.nr += urb_start;
       }
 
       if (inst->opcode == FS_OPCODE_CINTERP) {
-        assert(inst->src[0].file == FIXED_HW_REG);
+        assert(inst->src[0].file == HW_REG);
         inst->src[0].fixed_hw_reg.nr += urb_start;
       }
    }
@@ -1449,8 +1456,8 @@ fs_visitor::compact_virtual_grfs()
          remap_table[i] = new_index;
          virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
          if (live_intervals_valid) {
-            virtual_grf_use[new_index] = virtual_grf_use[i];
-            virtual_grf_def[new_index] = virtual_grf_def[i];
+            virtual_grf_start[new_index] = virtual_grf_start[i];
+            virtual_grf_end[new_index] = virtual_grf_end[i];
          }
          ++new_index;
       }
@@ -1483,6 +1490,7 @@ fs_visitor::remove_dead_constants()
 {
    if (dispatch_width == 8) {
       this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+      this->nr_params_remap = c->prog_data.nr_params;
 
       for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
         this->params_remap[i] = -1;
@@ -1497,7 +1505,14 @@ fs_visitor::remove_dead_constants()
            if (inst->src[i].file != UNIFORM)
               continue;
 
-           assert(constant_nr < (int)c->prog_data.nr_params);
+           /* Section 5.11 of the OpenGL 4.3 spec says:
+            *
+            *     "Out-of-bounds reads return undefined values, which include
+            *     values from other variables of the active program or zero."
+            */
+           if (constant_nr < 0 || constant_nr >= (int)c->prog_data.nr_params) {
+              constant_nr = 0;
+           }
 
            /* For now, set this to non-negative.  We'll give it the
             * actual new number in a moment, in order to keep the
@@ -1545,6 +1560,10 @@ fs_visitor::remove_dead_constants()
         if (inst->src[i].file != UNIFORM)
            continue;
 
+        /* as above alias to 0 */
+        if (constant_nr < 0 || constant_nr >= (int)this->nr_params_remap) {
+           constant_nr = 0;
+        }
         assert(this->params_remap[constant_nr] != -1);
         inst->src[i].reg = this->params_remap[constant_nr];
         inst->src[i].reg_offset = 0;
@@ -1764,10 +1783,8 @@ fs_visitor::opt_algebraic()
 }
 
 /**
- * Must be called after calculate_live_intervales() to remove unused
- * writes to registers -- register allocation will fail otherwise
- * because something deffed but not used won't be considered to
- * interfere with other regs.
+ * Removes any instructions writing a VGRF where that VGRF is not used by any
+ * later instruction.
  */
 bool
 fs_visitor::dead_code_eliminate()
@@ -1780,9 +1797,12 @@ fs_visitor::dead_code_eliminate()
    foreach_list_safe(node, &this->instructions) {
       fs_inst *inst = (fs_inst *)node;
 
-      if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
-        inst->remove();
-        progress = true;
+      if (inst->dst.file == GRF) {
+         assert(this->virtual_grf_end[inst->dst.reg] >= pc);
+         if (this->virtual_grf_end[inst->dst.reg] == pc) {
+            inst->remove();
+            progress = true;
+         }
       }
 
       pc++;
@@ -2194,7 +2214,7 @@ fs_visitor::compute_to_mrf()
       /* Can't compute-to-MRF this GRF if someone else was going to
        * read it later.
        */
-      if (this->virtual_grf_use[inst->src[0].reg] > ip)
+      if (this->virtual_grf_end[inst->src[0].reg] > ip)
         continue;
 
       /* Found a move of a GRF to a MRF.  Let's see if we can go
@@ -2402,7 +2422,7 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
       int grf;
       if (inst->src[i].file == GRF) {
          grf = inst->src[i].reg;
-      } else if (inst->src[i].file == FIXED_HW_REG &&
+      } else if (inst->src[i].file == HW_REG &&
                  inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
          grf = inst->src[i].fixed_hw_reg.nr;
       } else {
@@ -3000,9 +3020,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
    bool start_busy = false;
    float start_time = 0;
 
-   if (unlikely(intel->perf_debug)) {
-      start_busy = (intel->batch.last_bo &&
-                    drm_intel_bo_busy(intel->batch.last_bo));
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
       start_time = get_time();
    }
 
@@ -3057,12 +3077,12 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
                                                    simd16_instructions,
                                                    final_assembly_size);
 
-   if (unlikely(intel->perf_debug) && shader) {
+   if (unlikely(brw->perf_debug) && shader) {
       if (shader->compiled_once)
          brw_wm_debug_recompile(brw, prog, &c->key);
       shader->compiled_once = true;
 
-      if (start_busy && !drm_intel_bo_busy(intel->batch.last_bo)) {
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
          perf_debug("FS compile took %.03f ms and stalled the GPU\n",
                     (get_time() - start_time) * 1000);
       }