i965: Define method to check whether a backend_reg is inside a given range.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 2f264255c0799844332b60c8fd7bb8c90849c5d0..a57f501a37e2b1ed17a62ed37e96ba7e9482ef16 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -28,8 +28,6 @@
   * from the LIR.
   */
  
-extern "C" {
-
  #include <sys/types.h>
  
  #include "util/hash_table.h"
@@ -43,7 +41,6 @@ extern "C" {
  #include "brw_context.h"
  #include "brw_eu.h"
  #include "brw_wm.h"
-}
  #include "brw_fs.h"
  #include "brw_cfg.h"
  #include "brw_dead_control_flow.h"
@@ -126,7 +123,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
     case HW_REG:
     case MRF:
     case ATTR:
-      this->regs_written = (dst.width * dst.stride * type_sz(dst.type) + 31) / 32;
+      this->regs_written =
+         DIV_ROUND_UP(MAX2(dst.width * dst.stride, 1) * type_sz(dst.type), 32);
        break;
     case BAD_FILE:
        this->regs_written = 0;
@@ -489,10 +487,7 @@ fs_inst::equals(fs_inst *inst) const
  bool
  fs_inst::overwrites_reg(const fs_reg &reg) const
  {
-   return (reg.file == dst.file &&
-           reg.reg == dst.reg &&
-           reg.reg_offset >= dst.reg_offset  &&
-           reg.reg_offset < dst.reg_offset + regs_written);
+   return reg.in_range(dst, regs_written);
  }
  
  bool
@@ -672,14 +667,21 @@ fs_visitor::type_size(const struct glsl_type *type)
     case GLSL_TYPE_VOID:
     case GLSL_TYPE_ERROR:
     case GLSL_TYPE_INTERFACE:
+   case GLSL_TYPE_DOUBLE:
        unreachable("not reached");
     }
  
     return 0;
  }
  
+/**
+ * Create a MOV to read the timestamp register.
+ *
+ * The caller is responsible for emitting the MOV.  The return value is
+ * the destination of the MOV, with extra parameters set.
+ */
  fs_reg
-fs_visitor::get_timestamp()
+fs_visitor::get_timestamp(fs_inst **out_mov)
  {
     assert(brw->gen >= 7);
  
@@ -690,7 +692,7 @@ fs_visitor::get_timestamp()
  
     fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4);
  
-   fs_inst *mov = emit(MOV(dst, ts));
+   fs_inst *mov = MOV(dst, ts);
     /* We want to read the 3 fields we care about even if it's not enabled in
      * the dispatch.
      */
@@ -708,6 +710,7 @@ fs_visitor::get_timestamp()
      */
     dst.set_smear(0);
  
+   *out_mov = mov;
     return dst;
  }
  
@@ -715,7 +718,9 @@ void
  fs_visitor::emit_shader_time_begin()
  {
     current_annotation = "shader time start";
-   shader_start_time = get_timestamp();
+   fs_inst *mov;
+   shader_start_time = get_timestamp(&mov);
+   emit(mov);
  }
  
  void
@@ -751,38 +756,50 @@ fs_visitor::emit_shader_time_end()
        unreachable("fs_visitor::emit_shader_time_end missing code");
     }
  
-   fs_reg shader_end_time = get_timestamp();
+   /* Insert our code just before the final SEND with EOT. */
+   exec_node *end = this->instructions.get_tail();
+   assert(end && ((fs_inst *) end)->eot);
+
+   fs_inst *tm_read;
+   fs_reg shader_end_time = get_timestamp(&tm_read);
+   end->insert_before(tm_read);
  
     /* Check that there weren't any timestamp reset events (assuming these
      * were the only two timestamp reads that happened).
      */
     fs_reg reset = shader_end_time;
     reset.set_smear(2);
-   fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u)));
+   fs_inst *test = AND(reg_null_d, reset, fs_reg(1u));
     test->conditional_mod = BRW_CONDITIONAL_Z;
-   emit(IF(BRW_PREDICATE_NORMAL));
+   test->force_writemask_all = true;
+   end->insert_before(test);
+   end->insert_before(IF(BRW_PREDICATE_NORMAL));
  
     fs_reg start = shader_start_time;
     start.negate = true;
     fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
-   emit(ADD(diff, start, shader_end_time));
+   diff.set_smear(0);
+   fs_inst *add = ADD(diff, start, shader_end_time);
+   add->force_writemask_all = true;
+   end->insert_before(add);
  
     /* If there were no instructions between the two timestamp gets, the diff
      * is 2 cycles.  Remove that overhead, so I can forget about that when
      * trying to determine the time taken for single instructions.
      */
-   emit(ADD(diff, diff, fs_reg(-2u)));
-
-   emit_shader_time_write(type, diff);
-   emit_shader_time_write(written_type, fs_reg(1u));
-   emit(BRW_OPCODE_ELSE);
-   emit_shader_time_write(reset_type, fs_reg(1u));
-   emit(BRW_OPCODE_ENDIF);
+   add = ADD(diff, diff, fs_reg(-2u));
+   add->force_writemask_all = true;
+   end->insert_before(add);
+
+   end->insert_before(SHADER_TIME_ADD(type, diff));
+   end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u)));
+   end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width));
+   end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u)));
+   end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width));
  }
  
-void
-fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
-                                   fs_reg value)
+fs_inst *
+fs_visitor::SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value)
  {
     int shader_time_index =
        brw_get_shader_time_index(brw, shader_prog, prog, type);
@@ -794,8 +811,8 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
     else
        payload = vgrf(glsl_type::uint_type);
  
-   emit(new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
-                             fs_reg(), payload, offset, value));
+   return new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
+                               fs_reg(), payload, offset, value);
  }
  
  void
@@ -809,11 +826,11 @@ fs_visitor::vfail(const char *format, va_list va)
     failed = true;
  
     msg = ralloc_vasprintf(mem_ctx, format, va);
-   msg = ralloc_asprintf(mem_ctx, "FS compile failed: %s\n", msg);
+   msg = ralloc_asprintf(mem_ctx, "%s compile failed: %s\n", stage_abbrev, msg);
  
     this->fail_msg = msg;
  
-   if (INTEL_DEBUG & DEBUG_WM) {
+   if (debug_enabled) {
        fprintf(stderr, "%s",  msg);
     }
  }
@@ -1562,6 +1579,17 @@ fs_visitor::emit_sampleid_setup()
     return reg;
  }
  
+void
+fs_visitor::resolve_source_modifiers(fs_reg *src)
+{
+   if (!src->abs && !src->negate)
+      return;
+
+   fs_reg temp = retype(vgrf(1), src->type);
+   emit(MOV(temp, *src));
+   *src = temp;
+}
+
  fs_reg
  fs_visitor::fix_math_operand(fs_reg src)
  {
@@ -1662,6 +1690,21 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
     return inst;
  }
  
+void
+fs_visitor::emit_discard_jump()
+{
+   /* For performance, after a discard, jump to the end of the
+    * shader if all relevant channels have been discarded.
+    */
+   fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
+   discard_jump->flag_subreg = 1;
+
+   discard_jump->predicate = (dispatch_width == 8)
+                             ? BRW_PREDICATE_ALIGN1_ANY8H
+                             : BRW_PREDICATE_ALIGN1_ANY16H;
+   discard_jump->predicate_inverse = true;
+}
+
  void
  fs_visitor::assign_curb_setup()
  {
@@ -2250,8 +2293,13 @@ fs_visitor::demote_pull_constants()
          if (inst->src[i].file != UNIFORM)
             continue;
  
-         int pull_index = pull_constant_loc[inst->src[i].reg +
-                                            inst->src[i].reg_offset];
+         int pull_index;
+         unsigned location = inst->src[i].reg + inst->src[i].reg_offset;
+         if (location >= uniforms) /* Out of bounds access */
+            pull_index = -1;
+         else
+            pull_index = pull_constant_loc[location];
+
           if (pull_index == -1)
             continue;
  
@@ -2324,6 +2372,15 @@ fs_visitor::opt_algebraic()
             break;
          }
  
+         /* a * -1.0 = -a */
+         if (inst->src[1].is_negative_one()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0].negate = !inst->src[0].negate;
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
+
           /* a * 0.0 = 0.0 */
           if (inst->src[1].is_zero()) {
              inst->opcode = BRW_OPCODE_MOV;
@@ -2333,6 +2390,14 @@ fs_visitor::opt_algebraic()
              break;
           }
  
+         if (inst->src[0].file == IMM) {
+            assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0].fixed_hw_reg.dw1.f *= inst->src[1].fixed_hw_reg.dw1.f;
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
          break;
        case BRW_OPCODE_ADD:
           if (inst->src[1].file != IMM)
@@ -2345,6 +2410,15 @@ fs_visitor::opt_algebraic()
              progress = true;
              break;
           }
+
+         if (inst->src[0].file == IMM) {
+            assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0].fixed_hw_reg.dw1.f += inst->src[1].fixed_hw_reg.dw1.f;
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
           break;
        case BRW_OPCODE_OR:
           if (inst->src[0].equals(inst->src[1])) {
@@ -2392,6 +2466,7 @@ fs_visitor::opt_algebraic()
                    if (inst->src[1].fixed_hw_reg.dw1.f >= 1.0f) {
                       inst->opcode = BRW_OPCODE_MOV;
                       inst->src[1] = reg_undef;
+                     inst->conditional_mod = BRW_CONDITIONAL_NONE;
                       progress = true;
                    }
                    break;
@@ -2418,6 +2493,33 @@ fs_visitor::opt_algebraic()
              }
           }
           break;
+      case BRW_OPCODE_MAD:
+         if (inst->src[1].is_zero() || inst->src[2].is_zero()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[1] = reg_undef;
+            inst->src[2] = reg_undef;
+            progress = true;
+         } else if (inst->src[0].is_zero()) {
+            inst->opcode = BRW_OPCODE_MUL;
+            inst->src[0] = inst->src[2];
+            inst->src[2] = reg_undef;
+            progress = true;
+         } else if (inst->src[1].is_one()) {
+            inst->opcode = BRW_OPCODE_ADD;
+            inst->src[1] = inst->src[2];
+            inst->src[2] = reg_undef;
+            progress = true;
+         } else if (inst->src[2].is_one()) {
+            inst->opcode = BRW_OPCODE_ADD;
+            inst->src[2] = reg_undef;
+            progress = true;
+         } else if (inst->src[1].file == IMM && inst->src[2].file == IMM) {
+            inst->opcode = BRW_OPCODE_ADD;
+            inst->src[1].fixed_hw_reg.dw1.f *= inst->src[2].fixed_hw_reg.dw1.f;
+            inst->src[2] = reg_undef;
+            progress = true;
+         }
+         break;
        case SHADER_OPCODE_RCP: {
           fs_inst *prev = (fs_inst *)inst->prev;
           if (prev->opcode == SHADER_OPCODE_SQRT) {
@@ -2432,8 +2534,16 @@ fs_visitor::opt_algebraic()
        default:
          break;
        }
-   }
  
+      /* Swap if src[0] is immediate. */
+      if (progress && inst->is_commutative()) {
+         if (inst->src[0].file == IMM) {
+            fs_reg tmp = inst->src[1];
+            inst->src[1] = inst->src[0];
+            inst->src[0] = tmp;
+         }
+      }
+   }
     return progress;
  }
  
@@ -2503,6 +2613,47 @@ fs_visitor::opt_register_renaming()
     return progress;
  }
  
+/**
+ * Remove redundant or useless discard jumps.
+ *
+ * For example, we can eliminate jumps in the following sequence:
+ *
+ * discard-jump       (redundant with the next jump)
+ * discard-jump       (useless; jumps to the next instruction)
+ * placeholder-halt
+ */
+bool
+fs_visitor::opt_redundant_discard_jumps()
+{
+   bool progress = false;
+
+   bblock_t *last_bblock = cfg->blocks[cfg->num_blocks - 1];
+
+   fs_inst *placeholder_halt = NULL;
+   foreach_inst_in_block_reverse(fs_inst, inst, last_bblock) {
+      if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT) {
+         placeholder_halt = inst;
+         break;
+      }
+   }
+
+   if (!placeholder_halt)
+      return false;
+
+   /* Delete any HALTs immediately before the placeholder halt. */
+   for (fs_inst *prev = (fs_inst *) placeholder_halt->prev;
+        !prev->is_head_sentinel() && prev->opcode == FS_OPCODE_DISCARD_JUMP;
+        prev = (fs_inst *) placeholder_halt->prev) {
+      prev->remove(last_bblock);
+      progress = true;
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
  bool
  fs_visitor::compute_to_mrf()
  {
@@ -2756,7 +2907,7 @@ fs_visitor::remove_duplicate_mrf_writes()
  
        /* Clear out any MRF move records whose sources got overwritten. */
        if (inst->dst.file == GRF) {
-        for (unsigned int i = 0; i < Elements(last_mrf_move); i++) {
+        for (unsigned int i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
             if (last_mrf_move[i] &&
                 last_mrf_move[i]->src[0].reg == inst->dst.reg) {
                last_mrf_move[i] = NULL;
@@ -2779,8 +2930,7 @@ fs_visitor::remove_duplicate_mrf_writes()
  }
  
  static void
-clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
-                        int first_grf, int grf_len)
+clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
  {
     /* Clear the flag for registers that actually got read (as expected). */
     for (int i = 0; i < inst->sources; i++) {
@@ -2831,8 +2981,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
     memset(needs_dep, false, sizeof(needs_dep));
     memset(needs_dep, true, write_len);
  
-   clear_deps_for_inst_src(inst, dispatch_width,
-                           needs_dep, first_write_grf, write_len);
+   clear_deps_for_inst_src(inst, needs_dep, first_write_grf, write_len);
  
     /* Walk backwards looking for writes to registers we're writing which
      * aren't read since being written.  If we hit the start of the program,
@@ -2872,8 +3021,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
        }
  
        /* Clear the flag for registers that actually got read (as expected). */
-      clear_deps_for_inst_src(scan_inst, dispatch_width,
-                              needs_dep, first_write_grf, write_len);
+      clear_deps_for_inst_src(scan_inst, needs_dep, first_write_grf, write_len);
  
        /* Continue the loop only if we haven't resolved all the dependencies */
        int i;
@@ -2918,8 +3066,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
        }
  
        /* Clear the flag for registers that actually got read (as expected). */
-      clear_deps_for_inst_src(scan_inst, dispatch_width,
-                              needs_dep, first_write_grf, write_len);
+      clear_deps_for_inst_src(scan_inst, needs_dep, first_write_grf, write_len);
  
        /* We insert our reads as late as possible since they're reading the
         * result of a SEND, which has massive latency.
@@ -2941,16 +3088,6 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
        if (i == write_len)
           return;
     }
-
-   /* If we hit the end of the program, resolve all remaining dependencies out
-    * of paranoia.
-    */
-   fs_inst *last_inst = (fs_inst *)this->instructions.get_tail();
-   assert(last_inst->eot);
-   for (int i = 0; i < write_len; i++) {
-      if (needs_dep[i])
-         last_inst->insert_before(block, DEP_RESOLVE_MOV(first_write_grf + i));
-   }
  }
  
  void
@@ -3008,7 +3145,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
           assert(const_offset_reg.file == IMM &&
                  const_offset_reg.type == BRW_REGISTER_TYPE_UD);
           const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
-         fs_reg payload = vgrf(glsl_type::uint_type);
+         fs_reg payload = fs_reg(GRF, alloc.allocate(1));
  
           /* We have to use a message header on Skylake to get SIMD4x2 mode.
            * Reserve space for the register.
@@ -3057,7 +3194,7 @@ fs_visitor::lower_load_payload()
     bool progress = false;
  
     int vgrf_to_reg[alloc.count];
-   int reg_count = 16; /* Leave room for MRF */
+   int reg_count = 0;
     for (unsigned i = 0; i < alloc.count; ++i) {
        vgrf_to_reg[i] = reg_count;
        reg_count += alloc.sizes[i];
@@ -3071,18 +3208,13 @@ fs_visitor::lower_load_payload()
     memset(metadata, 0, sizeof(metadata));
  
     foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
-      int dst_reg;
        if (inst->dst.file == GRF) {
-         dst_reg = vgrf_to_reg[inst->dst.reg];
-      } else {
-         /* MRF */
-         dst_reg = inst->dst.reg;
-      }
-
-      if (inst->dst.file == MRF || inst->dst.file == GRF) {
-         bool force_sechalf = inst->force_sechalf;
+         const int dst_reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
+         bool force_sechalf = inst->force_sechalf &&
+                              !inst->force_writemask_all;
           bool toggle_sechalf = inst->dst.width == 16 &&
-                               type_sz(inst->dst.type) == 4;
+                               type_sz(inst->dst.type) == 4 &&
+                               !inst->force_writemask_all;
           for (int i = 0; i < inst->regs_written; ++i) {
              metadata[dst_reg + i].written = true;
              metadata[dst_reg + i].force_sechalf = force_sechalf;
@@ -3123,22 +3255,28 @@ fs_visitor::lower_load_payload()
                                  inst->src[i].reg_offset;
                    mov->force_sechalf = metadata[src_reg].force_sechalf;
                    mov->force_writemask_all = metadata[src_reg].force_writemask_all;
-                  metadata[dst_reg] = metadata[src_reg];
-                  if (dst.width * type_sz(dst.type) > 32) {
-                     assert((!metadata[src_reg].written ||
-                             !metadata[src_reg].force_sechalf) &&
-                            (!metadata[src_reg + 1].written ||
-                             metadata[src_reg + 1].force_sechalf));
-                     metadata[dst_reg + 1] = metadata[src_reg + 1];
-                  }
                 } else {
-                  metadata[dst_reg].force_writemask_all = false;
-                  metadata[dst_reg].force_sechalf = false;
-                  if (dst.width == 16) {
-                     metadata[dst_reg + 1].force_writemask_all = false;
-                     metadata[dst_reg + 1].force_sechalf = true;
+                  /* We don't have any useful metadata for immediates or
+                   * uniforms.  Assume that any of the channels of the
+                   * destination may be used.
+                   */
+                  assert(inst->src[i].file == IMM ||
+                         inst->src[i].file == UNIFORM);
+                  mov->force_writemask_all = true;
+               }
+
+               if (dst.file == GRF) {
+                  const int dst_reg = vgrf_to_reg[dst.reg] + dst.reg_offset;
+                  const bool force_writemask = mov->force_writemask_all;
+                  metadata[dst_reg].force_writemask_all = force_writemask;
+                  metadata[dst_reg].force_sechalf = mov->force_sechalf;
+                  if (dst.width * type_sz(dst.type) > 32) {
+                     assert(!mov->force_sechalf);
+                     metadata[dst_reg + 1].force_writemask_all = force_writemask;
+                     metadata[dst_reg + 1].force_sechalf = !force_writemask;
                    }
                 }
+
                 inst->insert_before(block, mov);
              }
  
@@ -3165,7 +3303,6 @@ fs_visitor::dump_instructions()
  void
  fs_visitor::dump_instructions(const char *name)
  {
-   calculate_register_pressure();
     FILE *file = stderr;
     if (name && geteuid() != 0) {
        file = fopen(name, "w");
@@ -3173,14 +3310,23 @@ fs_visitor::dump_instructions(const char *name)
           file = stderr;
     }
  
-   int ip = 0, max_pressure = 0;
-   foreach_block_and_inst(block, backend_instruction, inst, cfg) {
-      max_pressure = MAX2(max_pressure, regs_live_at_ip[ip]);
-      fprintf(file, "{%3d} %4d: ", regs_live_at_ip[ip], ip);
-      dump_instruction(inst, file);
-      ++ip;
+   if (cfg) {
+      calculate_register_pressure();
+      int ip = 0, max_pressure = 0;
+      foreach_block_and_inst(block, backend_instruction, inst, cfg) {
+         max_pressure = MAX2(max_pressure, regs_live_at_ip[ip]);
+         fprintf(file, "{%3d} %4d: ", regs_live_at_ip[ip], ip);
+         dump_instruction(inst, file);
+         ip++;
+      }
+      fprintf(file, "Maximum %3d registers live at once.\n", max_pressure);
+   } else {
+      int ip = 0;
+      foreach_in_list(backend_instruction, inst, &instructions) {
+         fprintf(file, "%4d: ", ip++);
+         dump_instruction(inst, file);
+      }
     }
-   fprintf(file, "Maximum %3d registers live at once.\n", max_pressure);
  
     if (file != stderr) {
        fclose(file);
@@ -3312,9 +3458,11 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
           case BRW_REGISTER_TYPE_F:
              fprintf(file, "%ff", inst->src[i].fixed_hw_reg.dw1.f);
              break;
+         case BRW_REGISTER_TYPE_W:
           case BRW_REGISTER_TYPE_D:
              fprintf(file, "%dd", inst->src[i].fixed_hw_reg.dw1.d);
              break;
+         case BRW_REGISTER_TYPE_UW:
           case BRW_REGISTER_TYPE_UD:
              fprintf(file, "%uu", inst->src[i].fixed_hw_reg.dw1.ud);
              break;
@@ -3546,8 +3694,6 @@ fs_visitor::optimize()
  {
     const char *stage_name = stage == MESA_SHADER_VERTEX ? "vs" : "fs";
  
-   calculate_cfg();
-
     split_virtual_grfs();
  
     move_uniform_array_access_to_pull_constants();
@@ -3597,6 +3743,7 @@ fs_visitor::optimize()
        OPT(opt_peephole_sel);
        OPT(dead_control_flow_eliminate, this);
        OPT(opt_register_renaming);
+      OPT(opt_redundant_discard_jumps);
        OPT(opt_saturate_propagation);
        OPT(register_coalesce);
        OPT(compute_to_mrf);
@@ -3613,6 +3760,8 @@ fs_visitor::optimize()
        OPT(dead_code_eliminate);
     }
  
+   OPT(opt_combine_constants);
+
     lower_uniform_pull_constant_loads();
  }
  
@@ -3701,6 +3850,26 @@ fs_visitor::allocate_registers()
        prog_data->total_scratch = brw_get_scratch_size(last_scratch);
  }
  
+static bool
+env_var_as_boolean(const char *var_name, bool default_value)
+{
+   const char *str = getenv(var_name);
+   if (str == NULL)
+      return default_value;
+
+   if (strcmp(str, "1") == 0 ||
+       strcasecmp(str, "true") == 0 ||
+       strcasecmp(str, "yes") == 0) {
+      return true;
+   } else if (strcmp(str, "0") == 0 ||
+              strcasecmp(str, "false") == 0 ||
+              strcasecmp(str, "no") == 0) {
+      return false;
+   } else {
+      return default_value;
+   }
+}
+
  bool
  fs_visitor::run_vs()
  {
@@ -3712,17 +3881,24 @@ fs_visitor::run_vs()
     if (INTEL_DEBUG & DEBUG_SHADER_TIME)
        emit_shader_time_begin();
  
-   foreach_in_list(ir_instruction, ir, shader->base.ir) {
-      base_ir = ir;
-      this->result = reg_undef;
-      ir->accept(this);
+   if (env_var_as_boolean("INTEL_USE_NIR", false)) {
+      emit_nir_code();
+   } else {
+      foreach_in_list(ir_instruction, ir, shader->base.ir) {
+         base_ir = ir;
+         this->result = reg_undef;
+         ir->accept(this);
+      }
+      base_ir = NULL;
     }
-   base_ir = NULL;
+
     if (failed)
        return false;
  
     emit_urb_writes();
  
+   calculate_cfg();
+
     optimize();
  
     assign_curb_setup();
@@ -3779,7 +3955,7 @@ fs_visitor::run_fs()
         * functions called "main").
         */
        if (shader) {
-         if (getenv("INTEL_USE_NIR") != NULL) {
+         if (env_var_as_boolean("INTEL_USE_NIR", false)) {
              emit_nir_code();
           } else {
              foreach_in_list(ir_instruction, ir, shader->base.ir) {
@@ -3802,6 +3978,11 @@ fs_visitor::run_fs()
  
        emit_fb_writes();
  
+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_end();
+
+      calculate_cfg();
+
        optimize();
  
        assign_curb_setup();
@@ -3899,7 +4080,7 @@ brw_wm_fs_emit(struct brw_context *brw,
     }
  
     fs_generator g(brw, mem_ctx, (void *) key, &prog_data->base,
-                  &fp->Base, v.runtime_check_aads_emit, "FS");
+                  &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS");
  
     if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
        char *name;