i965/vs: Add a function for how many MRFs get written as part of a SEND.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
index 15475fbae2f9cc90ca4ba9eb62aa7c82b06bd490..9a89f88b61fe8aef1100cbcae246a92628f8efa3 100644 (file)
@@ -46,8 +46,8 @@ extern "C" {
 }
 #include "brw_shader.h"
 #include "brw_fs.h"
-#include "../glsl/glsl_types.h"
-#include "../glsl/ir_print_visitor.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_print_visitor.h"
 
 #define MAX_INSTRUCTION (1 << 30)
 
@@ -143,20 +143,22 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
       return 0;
 
    switch (inst->opcode) {
-   case FS_OPCODE_RCP:
-   case FS_OPCODE_RSQ:
-   case FS_OPCODE_SQRT:
-   case FS_OPCODE_EXP2:
-   case FS_OPCODE_LOG2:
-   case FS_OPCODE_SIN:
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
       return 1 * c->dispatch_width / 8;
-   case FS_OPCODE_POW:
+   case SHADER_OPCODE_POW:
       return 2 * c->dispatch_width / 8;
    case FS_OPCODE_TEX:
    case FS_OPCODE_TXB:
    case FS_OPCODE_TXD:
+   case FS_OPCODE_TXF:
    case FS_OPCODE_TXL:
+   case FS_OPCODE_TXS:
       return 1;
    case FS_OPCODE_FB_WRITE:
       return 2;
@@ -181,29 +183,26 @@ fs_visitor::virtual_grf_alloc(int size)
         virtual_grf_array_size *= 2;
       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
                                   virtual_grf_array_size);
-
-      /* This slot is always unused. */
-      virtual_grf_sizes[0] = 0;
    }
    virtual_grf_sizes[virtual_grf_next] = size;
    return virtual_grf_next++;
 }
 
 /** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg)
+fs_reg::fs_reg(enum register_file file, int reg)
 {
    init();
    this->file = file;
-   this->hw_reg = hw_reg;
+   this->reg = reg;
    this->type = BRW_REGISTER_TYPE_F;
 }
 
 /** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
+fs_reg::fs_reg(enum register_file file, int reg, uint32_t type)
 {
    init();
    this->file = file;
-   this->hw_reg = hw_reg;
+   this->reg = reg;
    this->type = type;
 }
 
@@ -242,11 +241,12 @@ import_uniforms_callback(const void *key,
  * This brings in those uniform definitions
  */
 void
-fs_visitor::import_uniforms(struct hash_table *src_variable_ht)
+fs_visitor::import_uniforms(fs_visitor *v)
 {
-   hash_table_call_foreach(src_variable_ht,
+   hash_table_call_foreach(v->variable_ht,
                           import_uniforms_callback,
                           variable_ht);
+   this->params_remap = v->params_remap;
 }
 
 /* Our support for uniforms is piggy-backed on the struct
@@ -281,23 +281,27 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
 
         assert(param < ARRAY_SIZE(c->prog_data.param));
 
-        switch (type->base_type) {
-        case GLSL_TYPE_FLOAT:
+        if (ctx->Const.NativeIntegers) {
            c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
-           break;
-        case GLSL_TYPE_UINT:
-           c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
-           break;
-        case GLSL_TYPE_INT:
-           c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
-           break;
-        case GLSL_TYPE_BOOL:
-           c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
-           break;
-        default:
-           assert(!"not reached");
-           c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
-           break;
+        } else {
+           switch (type->base_type) {
+           case GLSL_TYPE_FLOAT:
+              c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+              break;
+           case GLSL_TYPE_UINT:
+              c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
+              break;
+           case GLSL_TYPE_INT:
+              c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
+              break;
+           case GLSL_TYPE_BOOL:
+              c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
+              break;
+           default:
+              assert(!"not reached");
+              c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+              break;
+           }
         }
         this->param_index[param] = loc;
         this->param_offset[param] = i;
@@ -463,9 +467,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
         } else {
            /* Perspective interpolation case. */
            for (unsigned int k = 0; k < type->vector_elements; k++) {
-              struct brw_reg interp = interp_reg(location, k);
-              emit(FS_OPCODE_LINTERP, attr,
-                   this->delta_x, this->delta_y, fs_reg(interp));
+              /* FINISHME: At some point we probably want to push
+               * this farther by giving similar treatment to the
+               * other potentially constant components of the
+               * attribute, as well as making brw_vs_constval.c
+               * handle varyings other than gl_TexCoord.
+               */
+              if (location >= FRAG_ATTRIB_TEX0 &&
+                  location <= FRAG_ATTRIB_TEX7 &&
+                  k == 3 && !(c->key.proj_attrib_mask & (1 << location))) {
+                 emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
+              } else {
+                 struct brw_reg interp = interp_reg(location, k);
+                 emit(FS_OPCODE_LINTERP, attr,
+                      this->delta_x, this->delta_y, fs_reg(interp));
+              }
               attr.reg_offset++;
            }
 
@@ -512,16 +528,16 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
 }
 
 fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
 {
    switch (opcode) {
-   case FS_OPCODE_RCP:
-   case FS_OPCODE_RSQ:
-   case FS_OPCODE_SQRT:
-   case FS_OPCODE_EXP2:
-   case FS_OPCODE_LOG2:
-   case FS_OPCODE_SIN:
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
       break;
    default:
       assert(!"not reached: bad math opcode");
@@ -555,12 +571,12 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
 }
 
 fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
 {
    int base_mrf = 2;
    fs_inst *inst;
 
-   assert(opcode == FS_OPCODE_POW);
+   assert(opcode == SHADER_OPCODE_POW);
 
    if (intel->gen >= 6) {
       /* Can't do hstride == 0 args to gen6 math, so expand it out.
@@ -605,7 +621,7 @@ fs_visitor::setup_paramvalues_refs()
    /* Set up the pointers to ParamValues now that that array is finalized. */
    for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
       c->prog_data.param[i] =
-        fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+        (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] +
         this->param_offset[i];
    }
 }
@@ -626,7 +642,7 @@ fs_visitor::assign_curb_setup()
 
       for (unsigned int i = 0; i < 3; i++) {
         if (inst->src[i].file == UNIFORM) {
-           int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+           int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
            struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
                                                  constant_nr / 8,
                                                  constant_nr % 8);
@@ -657,14 +673,7 @@ fs_visitor::calculate_urb_setup()
       /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
       for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
         if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
-           int fp_index;
-
-           if (i >= VERT_RESULT_VAR0)
-              fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
-           else if (i <= VERT_RESULT_TEX7)
-              fp_index = i;
-           else
-              fp_index = -1;
+           int fp_index = _mesa_vert_result_to_frag_attrib((gl_vert_result) i);
 
            if (fp_index >= 0)
               urb_setup[fp_index] = urb_next++;
@@ -786,6 +795,86 @@ fs_visitor::split_virtual_grfs()
    this->live_intervals_valid = false;
 }
 
+bool
+fs_visitor::remove_dead_constants()
+{
+   if (c->dispatch_width == 8) {
+      this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
+        this->params_remap[i] = -1;
+
+      /* Find which params are still in use. */
+      foreach_list(node, &this->instructions) {
+        fs_inst *inst = (fs_inst *)node;
+
+        for (int i = 0; i < 3; i++) {
+           int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+           if (inst->src[i].file != UNIFORM)
+              continue;
+
+           assert(constant_nr < (int)c->prog_data.nr_params);
+
+           /* For now, set this to non-negative.  We'll give it the
+            * actual new number in a moment, in order to keep the
+            * register numbers nicely ordered.
+            */
+           this->params_remap[constant_nr] = 0;
+        }
+      }
+
+      /* Figure out what the new numbers for the params will be.  At some
+       * point when we're doing uniform array access, we're going to want
+       * to keep the distinction between .reg and .reg_offset, but for
+       * now we don't care.
+       */
+      unsigned int new_nr_params = 0;
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+        if (this->params_remap[i] != -1) {
+           this->params_remap[i] = new_nr_params++;
+        }
+      }
+
+      /* Update the list of params to be uploaded to match our new numbering. */
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+        int remapped = this->params_remap[i];
+
+        if (remapped == -1)
+           continue;
+
+        /* We've already done setup_paramvalues_refs() so no need to worry
+         * about param_index and param_offset.
+         */
+        c->prog_data.param[remapped] = c->prog_data.param[i];
+        c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i];
+      }
+
+      c->prog_data.nr_params = new_nr_params;
+   } else {
+      /* This should have been generated in the 8-wide pass already. */
+      assert(this->params_remap);
+   }
+
+   /* Now do the renumbering of the shader to remove unused params. */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      for (int i = 0; i < 3; i++) {
+        int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+        if (inst->src[i].file != UNIFORM)
+           continue;
+
+        assert(this->params_remap[constant_nr] != -1);
+        inst->src[i].reg = this->params_remap[constant_nr];
+        inst->src[i].reg_offset = 0;
+      }
+   }
+
+   return true;
+}
+
 /**
  * Choose accesses from the UNIFORM file to demote to using the pull
  * constant buffer.
@@ -822,7 +911,7 @@ fs_visitor::setup_pull_constants()
         if (inst->src[i].file != UNIFORM)
            continue;
 
-        int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+        int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
         if (uniform_nr < pull_uniform_base)
            continue;
 
@@ -892,7 +981,7 @@ fs_visitor::calculate_live_intervals()
         }
       } else {
         for (unsigned int i = 0; i < 3; i++) {
-           if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
+           if (inst->src[i].file == GRF) {
               int reg = inst->src[i].reg;
 
               if (!loop_depth) {
@@ -908,7 +997,7 @@ fs_visitor::calculate_live_intervals()
               }
            }
         }
-        if (inst->dst.file == GRF && inst->dst.reg != 0) {
+        if (inst->dst.file == GRF) {
            int reg = inst->dst.reg;
 
            if (!loop_depth) {
@@ -1044,6 +1133,24 @@ fs_visitor::propagate_constants()
                  progress = true;
               }
               break;
+
+           case SHADER_OPCODE_RCP:
+              /* The hardware doesn't do math on immediate values
+               * (because why are you doing that, seriously?), but
+               * the correct answer is to just constant fold it
+               * anyway.
+               */
+              assert(i == 0);
+              if (inst->src[0].imm.f != 0.0f) {
+                 scan_inst->opcode = BRW_OPCODE_MOV;
+                 scan_inst->src[0] = inst->src[0];
+                 scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
+                 progress = true;
+              }
+              break;
+
+           default:
+              break;
            }
         }
 
@@ -1061,6 +1168,49 @@ fs_visitor::propagate_constants()
 
    return progress;
 }
+
+
+/**
+ * Attempts to move immediate constants into the immediate
+ * constant slot of following instructions.
+ *
+ * Immediate constants are a bit tricky -- they have to be in the last
+ * operand slot, you can't do abs/negate on them,
+ */
+
+bool
+fs_visitor::opt_algebraic()
+{
+   bool progress = false;
+
+   calculate_live_intervals();
+
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MUL:
+        if (inst->src[1].file != IMM)
+           continue;
+
+        /* a * 1.0 = a */
+        if (inst->src[1].type == BRW_REGISTER_TYPE_F &&
+            inst->src[1].imm.f == 1.0) {
+           inst->opcode = BRW_OPCODE_MOV;
+           inst->src[1] = reg_undef;
+           progress = true;
+           break;
+        }
+
+        break;
+      default:
+        break;
+      }
+   }
+
+   return progress;
+}
+
 /**
  * Must be called after calculate_live_intervales() to remove unused
  * writes to registers -- register allocation will fail otherwise
@@ -1121,6 +1271,8 @@ fs_visitor::register_coalesce()
       case BRW_OPCODE_ENDIF:
         if_depth--;
         break;
+      default:
+        break;
       }
       if (loop_depth || if_depth)
         continue;
@@ -1128,7 +1280,8 @@ fs_visitor::register_coalesce()
       if (inst->opcode != BRW_OPCODE_MOV ||
          inst->predicated ||
          inst->saturate ||
-         inst->dst.file != GRF || inst->src[0].file != GRF ||
+         inst->dst.file != GRF || (inst->src[0].file != GRF &&
+                                   inst->src[0].file != UNIFORM)||
          inst->dst.type != inst->src[0].type)
         continue;
 
@@ -1150,7 +1303,8 @@ fs_visitor::register_coalesce()
               interfered = true;
               break;
            }
-           if (scan_inst->dst.reg == inst->src[0].reg &&
+           if (inst->src[0].file == GRF &&
+               scan_inst->dst.reg == inst->src[0].reg &&
                (scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
                 scan_inst->is_tex())) {
               interfered = true;
@@ -1158,10 +1312,13 @@ fs_visitor::register_coalesce()
            }
         }
 
-        /* The gen6 MATH instruction can't handle source modifiers, so avoid
-         * coalescing those for now.  We should do something more specific.
+        /* The gen6 MATH instruction can't handle source modifiers or
+         * unusual register regions, so avoid coalescing those for
+         * now.  We should do something more specific.
          */
-        if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) {
+        if (intel->gen >= 6 &&
+            scan_inst->is_math() &&
+            (has_source_modifiers || inst->src[0].file == UNIFORM)) {
            interfered = true;
            break;
         }
@@ -1180,11 +1337,10 @@ fs_visitor::register_coalesce()
            if (scan_inst->src[i].file == GRF &&
                scan_inst->src[i].reg == inst->dst.reg &&
                scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
-              scan_inst->src[i].reg = inst->src[0].reg;
-              scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
-              scan_inst->src[i].abs |= inst->src[0].abs;
-              scan_inst->src[i].negate ^= inst->src[0].negate;
-              scan_inst->src[i].smear = inst->src[0].smear;
+              fs_reg new_src = inst->src[0];
+              new_src.negate ^= scan_inst->src[i].negate;
+              new_src.abs |= scan_inst->src[i].abs;
+              scan_inst->src[i] = new_src;
            }
         }
       }
@@ -1224,9 +1380,9 @@ fs_visitor::compute_to_mrf()
       /* Work out which hardware MRF registers are written by this
        * instruction.
        */
-      int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+      int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
       int mrf_high;
-      if (inst->dst.hw_reg & BRW_MRF_COMPR4) {
+      if (inst->dst.reg & BRW_MRF_COMPR4) {
         mrf_high = mrf_low + 4;
       } else if (c->dispatch_width == 16 &&
                 (!inst->force_uncompressed && !inst->force_sechalf)) {
@@ -1293,7 +1449,7 @@ fs_visitor::compute_to_mrf()
            if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
               /* Found the creator of our MRF's source value. */
               scan_inst->dst.file = MRF;
-              scan_inst->dst.hw_reg = inst->dst.hw_reg;
+              scan_inst->dst.reg = inst->dst.reg;
               scan_inst->saturate |= inst->saturate;
               inst->remove();
               progress = true;
@@ -1330,10 +1486,10 @@ fs_visitor::compute_to_mrf()
            /* If somebody else writes our MRF here, we can't
             * compute-to-MRF before that.
             */
-           int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+           int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
            int scan_mrf_high;
 
-           if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) {
+           if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
               scan_mrf_high = scan_mrf_low + 4;
            } else if (c->dispatch_width == 16 &&
                       (!scan_inst->force_uncompressed &&
@@ -1405,7 +1561,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 
       if (inst->opcode == BRW_OPCODE_MOV &&
          inst->dst.file == MRF) {
-        fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+        fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
         if (prev_inst && inst->equals(prev_inst)) {
            inst->remove();
            progress = true;
@@ -1415,7 +1571,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 
       /* Clear out the last-write records for MRFs that were overwritten. */
       if (inst->dst.file == MRF) {
-        last_mrf_move[inst->dst.hw_reg] = NULL;
+        last_mrf_move[inst->dst.reg] = NULL;
       }
 
       if (inst->mlen > 0) {
@@ -1441,7 +1597,7 @@ fs_visitor::remove_duplicate_mrf_writes()
          inst->dst.file == MRF &&
          inst->src[0].file == GRF &&
          !inst->predicated) {
-        last_mrf_move[inst->dst.hw_reg] = inst;
+        last_mrf_move[inst->dst.reg] = inst;
       }
    }
 
@@ -1546,11 +1702,14 @@ fs_visitor::run()
         progress = remove_duplicate_mrf_writes() || progress;
 
         progress = propagate_constants() || progress;
+        progress = opt_algebraic() || progress;
         progress = register_coalesce() || progress;
         progress = compute_to_mrf() || progress;
         progress = dead_code_eliminate() || progress;
       } while (progress);
 
+      remove_dead_constants();
+
       schedule_instructions();
 
       assign_curb_setup();
@@ -1559,7 +1718,7 @@ fs_visitor::run()
       if (0) {
         /* Debug of register spilling: Go spill everything. */
         int virtual_grf_count = virtual_grf_next;
-        for (int i = 1; i < virtual_grf_count; i++) {
+        for (int i = 0; i < virtual_grf_count; i++) {
            spill_reg(i);
         }
       }
@@ -1621,7 +1780,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
    fs_visitor v(c, prog, shader);
    if (!v.run()) {
       prog->LinkStatus = GL_FALSE;
-      prog->InfoLog = ralloc_strdup(prog, v.fail_msg);
+      ralloc_strcat(&prog->InfoLog, v.fail_msg);
 
       return false;
    }
@@ -1629,7 +1788,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
    if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
       c->dispatch_width = 16;
       fs_visitor v2(c, prog, shader);
-      v2.import_uniforms(v.variable_ht);
+      v2.import_uniforms(&v);
       v2.run();
    }
 
@@ -1663,17 +1822,12 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
 
    key.vp_outputs_written |= BITFIELD64_BIT(FRAG_ATTRIB_WPOS);
    for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
-      int vp_index = -1;
-
       if (!(fp->Base.InputsRead & BITFIELD64_BIT(i)))
         continue;
 
       key.proj_attrib_mask |= 1 << i;
 
-      if (i <= FRAG_ATTRIB_TEX7)
-        vp_index = i;
-      else if (i >= FRAG_ATTRIB_VAR0)
-        vp_index = i - FRAG_ATTRIB_VAR0 + VERT_RESULT_VAR0;
+      int vp_index = _mesa_vert_result_to_frag_attrib((gl_vert_result) i);
 
       if (vp_index >= 0)
         key.vp_outputs_written |= BITFIELD64_BIT(vp_index);