Merge branch 'glsl-to-tgsi'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
index 31f76f8c939198d766e99ae2b031abdbab8ce0c6..02041b3bc0394886ac46557e83e1b078816f4de0 100644 (file)
@@ -463,9 +463,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
         } else {
            /* Perspective interpolation case. */
            for (unsigned int k = 0; k < type->vector_elements; k++) {
-              struct brw_reg interp = interp_reg(location, k);
-              emit(FS_OPCODE_LINTERP, attr,
-                   this->delta_x, this->delta_y, fs_reg(interp));
+              /* FINISHME: At some point we probably want to push
+               * this farther by giving similar treatment to the
+               * other potentially constant components of the
+               * attribute, as well as making brw_vs_constval.c
+               * handle varyings other than gl_TexCoord.
+               */
+              if (location >= FRAG_ATTRIB_TEX0 &&
+                  location <= FRAG_ATTRIB_TEX7 &&
+                  k == 3 && !(c->key.proj_attrib_mask & (1 << location))) {
+                 emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
+              } else {
+                 struct brw_reg interp = interp_reg(location, k);
+                 emit(FS_OPCODE_LINTERP, attr,
+                      this->delta_x, this->delta_y, fs_reg(interp));
+              }
               attr.reg_offset++;
            }
 
@@ -621,8 +633,8 @@ fs_visitor::assign_curb_setup()
    }
 
    /* Map the offsets in the UNIFORM file to fixed HW regs. */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
         if (inst->src[i].file == UNIFORM) {
@@ -684,8 +696,8 @@ fs_visitor::assign_urb_setup()
    /* Offset all the urb_setup[] index by the actual position of the
     * setup regs, now that the location of the constants has been chosen.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode == FS_OPCODE_LINTERP) {
         assert(inst->src[2].file == FIXED_HW_REG);
@@ -739,8 +751,8 @@ fs_visitor::split_virtual_grfs()
       split_grf[this->delta_x.reg] = false;
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       /* Texturing produces 4 contiguous registers, so no splitting. */
       if (inst->is_tex()) {
@@ -763,8 +775,8 @@ fs_visitor::split_virtual_grfs()
       }
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->dst.file == GRF &&
          split_grf[inst->dst.reg] &&
@@ -815,8 +827,8 @@ fs_visitor::setup_pull_constants()
    int pull_uniform_base = max_uniform_components;
    int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (int i = 0; i < 3; i++) {
         if (inst->src[i].file != UNIFORM)
@@ -871,8 +883,8 @@ fs_visitor::calculate_live_intervals()
    }
 
    int ip = 0;
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode == BRW_OPCODE_DO) {
         if (loop_depth++ == 0)
@@ -945,8 +957,8 @@ fs_visitor::propagate_constants()
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode != BRW_OPCODE_MOV ||
          inst->predicated ||
@@ -965,11 +977,9 @@ fs_visitor::propagate_constants()
       /* Found a move of a constant to a GRF.  Find anything else using the GRF
        * before it's written, and replace it with the constant if we can.
        */
-      exec_list_iterator scan_iter = iter;
-      scan_iter.next();
-      for (; scan_iter.has_next(); scan_iter.next()) {
-        fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+      for (fs_inst *scan_inst = (fs_inst *)inst->next;
+          !scan_inst->is_tail_sentinel();
+          scan_inst = (fs_inst *)scan_inst->next) {
         if (scan_inst->opcode == BRW_OPCODE_DO ||
             scan_inst->opcode == BRW_OPCODE_WHILE ||
             scan_inst->opcode == BRW_OPCODE_ELSE ||
@@ -1046,6 +1056,21 @@ fs_visitor::propagate_constants()
                  progress = true;
               }
               break;
+
+           case FS_OPCODE_RCP:
+              /* The hardware doesn't do math on immediate values
+               * (because why are you doing that, seriously?), but
+               * the correct answer is to just constant fold it
+               * anyway.
+               */
+              assert(i == 0);
+              if (inst->src[0].imm.f != 0.0f) {
+                 scan_inst->opcode = BRW_OPCODE_MOV;
+                 scan_inst->src[0] = inst->src[0];
+                 scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
+                 progress = true;
+              }
+              break;
            }
         }
 
@@ -1063,6 +1088,47 @@ fs_visitor::propagate_constants()
 
    return progress;
 }
+
+
+/**
+ * Attempts to move immediate constants into the immediate
+ * constant slot of following instructions.
+ *
+ * Immediate constants are a bit tricky -- they have to be in the last
+ * operand slot, you can't do abs/negate on them,
+ */
+
+bool
+fs_visitor::opt_algebraic()
+{
+   bool progress = false;
+
+   calculate_live_intervals();
+
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MUL:
+        if (inst->src[1].file != IMM)
+           continue;
+
+        /* a * 1.0 = a */
+        if (inst->src[1].type == BRW_REGISTER_TYPE_F &&
+            inst->src[1].imm.f == 1.0) {
+           inst->opcode = BRW_OPCODE_MOV;
+           inst->src[1] = reg_undef;
+           progress = true;
+           break;
+        }
+
+        break;
+      }
+   }
+
+   return progress;
+}
+
 /**
  * Must be called after calculate_live_intervales() to remove unused
  * writes to registers -- register allocation will fail otherwise
@@ -1077,8 +1143,8 @@ fs_visitor::dead_code_eliminate()
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
         inst->remove();
@@ -1101,8 +1167,8 @@ fs_visitor::register_coalesce()
    int if_depth = 0;
    int loop_depth = 0;
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       /* Make sure that we dominate the instructions we're going to
        * scan for interfering with our coalescing, or we won't have
@@ -1130,7 +1196,8 @@ fs_visitor::register_coalesce()
       if (inst->opcode != BRW_OPCODE_MOV ||
          inst->predicated ||
          inst->saturate ||
-         inst->dst.file != GRF || inst->src[0].file != GRF ||
+         inst->dst.file != GRF || (inst->src[0].file != GRF &&
+                                   inst->src[0].file != UNIFORM)||
          inst->dst.type != inst->src[0].type)
         continue;
 
@@ -1141,11 +1208,10 @@ fs_visitor::register_coalesce()
        * program.
        */
       bool interfered = false;
-      exec_list_iterator scan_iter = iter;
-      scan_iter.next();
-      for (; scan_iter.has_next(); scan_iter.next()) {
-        fs_inst *scan_inst = (fs_inst *)scan_iter.get();
 
+      for (fs_inst *scan_inst = (fs_inst *)inst->next;
+          !scan_inst->is_tail_sentinel();
+          scan_inst = (fs_inst *)scan_inst->next) {
         if (scan_inst->dst.file == GRF) {
            if (scan_inst->dst.reg == inst->dst.reg &&
                (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
@@ -1153,7 +1219,8 @@ fs_visitor::register_coalesce()
               interfered = true;
               break;
            }
-           if (scan_inst->dst.reg == inst->src[0].reg &&
+           if (inst->src[0].file == GRF &&
+               scan_inst->dst.reg == inst->src[0].reg &&
                (scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
                 scan_inst->is_tex())) {
               interfered = true;
@@ -1161,10 +1228,13 @@ fs_visitor::register_coalesce()
            }
         }
 
-        /* The gen6 MATH instruction can't handle source modifiers, so avoid
-         * coalescing those for now.  We should do something more specific.
+        /* The gen6 MATH instruction can't handle source modifiers or
+         * unusual register regions, so avoid coalescing those for
+         * now.  We should do something more specific.
          */
-        if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) {
+        if (intel->gen >= 6 &&
+            scan_inst->is_math() &&
+            (has_source_modifiers || inst->src[0].file == UNIFORM)) {
            interfered = true;
            break;
         }
@@ -1176,19 +1246,17 @@ fs_visitor::register_coalesce()
       /* Rewrite the later usage to point at the source of the move to
        * be removed.
        */
-      for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
-          scan_iter.next()) {
-        fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+      for (fs_inst *scan_inst = inst;
+          !scan_inst->is_tail_sentinel();
+          scan_inst = (fs_inst *)scan_inst->next) {
         for (int i = 0; i < 3; i++) {
            if (scan_inst->src[i].file == GRF &&
                scan_inst->src[i].reg == inst->dst.reg &&
                scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
-              scan_inst->src[i].reg = inst->src[0].reg;
-              scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
-              scan_inst->src[i].abs |= inst->src[0].abs;
-              scan_inst->src[i].negate ^= inst->src[0].negate;
-              scan_inst->src[i].smear = inst->src[0].smear;
+              fs_reg new_src = inst->src[0];
+              new_src.negate ^= scan_inst->src[i].negate;
+              new_src.abs |= scan_inst->src[i].abs;
+              scan_inst->src[i] = new_src;
            }
         }
       }
@@ -1212,8 +1280,8 @@ fs_visitor::compute_to_mrf()
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       int ip = next_ip;
       next_ip++;
@@ -1392,8 +1460,8 @@ fs_visitor::remove_duplicate_mrf_writes()
 
    memset(last_mrf_move, 0, sizeof(last_mrf_move));
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       switch (inst->opcode) {
       case BRW_OPCODE_DO:
@@ -1527,12 +1595,14 @@ fs_visitor::run()
       /* Generate FS IR for main().  (the visitor only descends into
        * functions called "main").
        */
-      foreach_iter(exec_list_iterator, iter, *shader->ir) {
-        ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &*shader->ir) {
+        ir_instruction *ir = (ir_instruction *)node;
         base_ir = ir;
         this->result = reg_undef;
         ir->accept(this);
       }
+      if (failed)
+        return false;
 
       emit_fb_writes();
 
@@ -1548,6 +1618,7 @@ fs_visitor::run()
         progress = remove_duplicate_mrf_writes() || progress;
 
         progress = propagate_constants() || progress;
+        progress = opt_algebraic() || progress;
         progress = register_coalesce() || progress;
         progress = compute_to_mrf() || progress;
         progress = dead_code_eliminate() || progress;
@@ -1684,6 +1755,9 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
    key.clamp_fragment_color = true;
 
    for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      if (fp->Base.ShadowSamplers & (1 << i))
+        key.compare_funcs[i] = GL_LESS;
+
       /* FINISHME: depth compares might use (0,0,0,W) for example */
       key.tex_swizzles[i] = SWIZZLE_XYZW;
    }
@@ -1697,14 +1771,12 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
 
    key.program_string_id = bfp->id;
 
-   drm_intel_bo *old_prog_bo = brw->wm.prog_bo;
+   uint32_t old_prog_offset = brw->wm.prog_offset;
    struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
-   brw->wm.prog_bo = NULL;
 
    bool success = do_wm_prog(brw, prog, bfp, &key);
 
-   drm_intel_bo_unreference(brw->wm.prog_bo);
-   brw->wm.prog_bo = old_prog_bo;
+   brw->wm.prog_offset = old_prog_offset;
    brw->wm.prog_data = old_prog_data;
 
    return success;