Merge remote-tracking branch 'mesa-public/master' into vulkan

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 0647d8f8f975e51b5abe3be53288c98deea8de23..2c0ff961182cea05f8998dfeeeeab99874db5fa5 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -214,143 +214,6 @@ fs_inst::resize_sources(uint8_t num_sources)
     }
  }
  
-#define ALU1(op)                                                        \
-   fs_inst *                                                            \
-   fs_visitor::op(const fs_reg &dst, const fs_reg &src0)                \
-   {                                                                    \
-      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0);          \
-   }
-
-#define ALU2(op)                                                        \
-   fs_inst *                                                            \
-   fs_visitor::op(const fs_reg &dst, const fs_reg &src0,                \
-                  const fs_reg &src1)                                   \
-   {                                                                    \
-      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);    \
-   }
-
-#define ALU2_ACC(op)                                                    \
-   fs_inst *                                                            \
-   fs_visitor::op(const fs_reg &dst, const fs_reg &src0,                \
-                  const fs_reg &src1)                                   \
-   {                                                                    \
-      fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\
-      inst->writes_accumulator = true;                                  \
-      return inst;                                                      \
-   }
-
-#define ALU3(op)                                                        \
-   fs_inst *                                                            \
-   fs_visitor::op(const fs_reg &dst, const fs_reg &src0,                \
-                  const fs_reg &src1, const fs_reg &src2)               \
-   {                                                                    \
-      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
-   }
-
-ALU1(NOT)
-ALU1(MOV)
-ALU1(FRC)
-ALU1(RNDD)
-ALU1(RNDE)
-ALU1(RNDZ)
-ALU2(ADD)
-ALU2(MUL)
-ALU2_ACC(MACH)
-ALU2(AND)
-ALU2(OR)
-ALU2(XOR)
-ALU2(SHL)
-ALU2(SHR)
-ALU2(ASR)
-ALU3(LRP)
-ALU1(BFREV)
-ALU3(BFE)
-ALU2(BFI1)
-ALU3(BFI2)
-ALU1(FBH)
-ALU1(FBL)
-ALU1(CBIT)
-ALU3(MAD)
-ALU2_ACC(ADDC)
-ALU2_ACC(SUBB)
-ALU2(SEL)
-ALU2(MAC)
-
-/** Gen4 predicated IF. */
-fs_inst *
-fs_visitor::IF(enum brw_predicate predicate)
-{
-   fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width);
-   inst->predicate = predicate;
-   return inst;
-}
-
-/** Gen6 IF with embedded comparison. */
-fs_inst *
-fs_visitor::IF(const fs_reg &src0, const fs_reg &src1,
-               enum brw_conditional_mod condition)
-{
-   assert(devinfo->gen == 6);
-   fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width,
-                                        reg_null_d, src0, src1);
-   inst->conditional_mod = condition;
-   return inst;
-}
-
-/**
- * CMP: Sets the low bit of the destination channels with the result
- * of the comparison, while the upper bits are undefined, and updates
- * the flag register with the packed 16 bits of the result.
- */
-fs_inst *
-fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1,
-                enum brw_conditional_mod condition)
-{
-   fs_inst *inst;
-
-   /* Take the instruction:
-    *
-    * CMP null<d> src0<f> src1<f>
-    *
-    * Original gen4 does type conversion to the destination type before
-    * comparison, producing garbage results for floating point comparisons.
-    *
-    * The destination type doesn't matter on newer generations, so we set the
-    * type to match src0 so we can compact the instruction.
-    */
-   dst.type = src0.type;
-   if (dst.file == HW_REG)
-      dst.fixed_hw_reg.type = dst.type;
-
-   resolve_ud_negate(&src0);
-   resolve_ud_negate(&src1);
-
-   inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1);
-   inst->conditional_mod = condition;
-
-   return inst;
-}
-
-fs_inst *
-fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources,
-                         int header_size)
-{
-   assert(dst.width % 8 == 0);
-   fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, dst.width,
-                                        dst, src, sources);
-   inst->header_size = header_size;
-
-   for (int i = 0; i < header_size; i++)
-      assert(src[i].file != GRF || src[i].width * type_sz(src[i].type) == 32);
-   inst->regs_written = header_size;
-
-   for (int i = header_size; i < sources; ++i)
-      assert(src[i].file != GRF || src[i].width == dst.width);
-   inst->regs_written += (sources - header_size) * (dst.width / 8);
-
-   return inst;
-}
-
  void
  fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
                                         const fs_reg &dst,
@@ -662,6 +525,7 @@ fs_visitor::type_size(const struct glsl_type *type)
     case GLSL_TYPE_ERROR:
     case GLSL_TYPE_INTERFACE:
     case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_FUNCTION:
        unreachable("not reached");
     }
  
@@ -715,39 +579,6 @@ fs_visitor::emit_shader_time_begin()
  void
  fs_visitor::emit_shader_time_end()
  {
-   enum shader_time_shader_type type, written_type, reset_type;
-   switch (stage) {
-   case MESA_SHADER_VERTEX:
-      type = ST_VS;
-      written_type = ST_VS_WRITTEN;
-      reset_type = ST_VS_RESET;
-      break;
-   case MESA_SHADER_GEOMETRY:
-      type = ST_GS;
-      written_type = ST_GS_WRITTEN;
-      reset_type = ST_GS_RESET;
-      break;
-   case MESA_SHADER_FRAGMENT:
-      if (dispatch_width == 8) {
-         type = ST_FS8;
-         written_type = ST_FS8_WRITTEN;
-         reset_type = ST_FS8_RESET;
-      } else {
-         assert(dispatch_width == 16);
-         type = ST_FS16;
-         written_type = ST_FS16_WRITTEN;
-         reset_type = ST_FS16_RESET;
-      }
-      break;
-   case MESA_SHADER_COMPUTE:
-      type = ST_CS;
-      written_type = ST_CS_WRITTEN;
-      reset_type = ST_CS_RESET;
-      break;
-   default:
-      unreachable("fs_visitor::emit_shader_time_end missing code");
-   }
-
     /* Insert our code just before the final SEND with EOT. */
     exec_node *end = this->instructions.get_tail();
     assert(end && ((fs_inst *) end)->eot);
@@ -776,20 +607,20 @@ fs_visitor::emit_shader_time_end()
      * trying to determine the time taken for single instructions.
      */
     ibld.ADD(diff, diff, fs_reg(-2u));
-   SHADER_TIME_ADD(ibld, type, diff);
-   SHADER_TIME_ADD(ibld, written_type, fs_reg(1u));
+   SHADER_TIME_ADD(ibld, 0, diff);
+   SHADER_TIME_ADD(ibld, 1, fs_reg(1u));
     ibld.emit(BRW_OPCODE_ELSE);
-   SHADER_TIME_ADD(ibld, reset_type, fs_reg(1u));
+   SHADER_TIME_ADD(ibld, 2, fs_reg(1u));
     ibld.emit(BRW_OPCODE_ENDIF);
  }
  
  void
  fs_visitor::SHADER_TIME_ADD(const fs_builder &bld,
-                            enum shader_time_shader_type type, fs_reg value)
+                            int shader_time_subindex,
+                            fs_reg value)
  {
-   int shader_time_index =
-      brw_get_shader_time_index(brw, shader_prog, prog, type);
-   fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
+   int index = shader_time_index * 3 + shader_time_subindex;
+   fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE);
  
     fs_reg payload;
     if (dispatch_width == 8)
@@ -840,65 +671,16 @@ fs_visitor::fail(const char *format, ...)
   * During a SIMD16 compile (if one happens anyway), this just calls fail().
   */
  void
-fs_visitor::no16(const char *format, ...)
+fs_visitor::no16(const char *msg)
  {
-   va_list va;
-
-   va_start(va, format);
-
     if (dispatch_width == 16) {
-      vfail(format, va);
+      fail("%s", msg);
     } else {
        simd16_unsupported = true;
  
-      if (brw->perf_debug) {
-         if (no16_msg)
-            ralloc_vasprintf_append(&no16_msg, format, va);
-         else
-            no16_msg = ralloc_vasprintf(mem_ctx, format, va);
-      }
+      compiler->shader_perf_log(log_data,
+                                "SIMD16 shader failed to compile: %s", msg);
     }
-
-   va_end(va);
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dispatch_width));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst, src0));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
-                 const fs_reg &src1)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
-                 const fs_reg &src1, const fs_reg &src2)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1, src2));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst,
-                 fs_reg src[], int sources)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst, src, sources));
  }
  
  /**
@@ -1053,14 +835,6 @@ fs_visitor::vgrf(const glsl_type *const type)
                   brw_type_for_base_type(type), dispatch_width);
  }
  
-fs_reg
-fs_visitor::vgrf(int num_components)
-{
-   int reg_width = dispatch_width / 8;
-   return fs_reg(GRF, alloc.allocate(num_components * reg_width),
-                 BRW_REGISTER_TYPE_F, dispatch_width);
-}
-
  /** Fixed HW reg constructor. */
  fs_reg::fs_reg(enum register_file file, int reg)
  {
@@ -1487,106 +1261,6 @@ fs_visitor::resolve_source_modifiers(fs_reg *src)
     *src = temp;
  }
  
-fs_reg
-fs_visitor::fix_math_operand(fs_reg src)
-{
-   /* Can't do hstride == 0 args on gen6 math, so expand it out. We
-    * might be able to do better by doing execsize = 1 math and then
-    * expanding that result out, but we would need to be careful with
-    * masking.
-    *
-    * The hardware ignores source modifiers (negate and abs) on math
-    * instructions, so we also move to a temp to set those up.
-    */
-   if (devinfo->gen == 6 && src.file != UNIFORM && src.file != IMM &&
-       !src.abs && !src.negate)
-      return src;
-
-   /* Gen7 relaxes most of the above restrictions, but still can't use IMM
-    * operands to math
-    */
-   if (devinfo->gen >= 7 && src.file != IMM)
-      return src;
-
-   fs_reg expanded = vgrf(glsl_type::float_type);
-   expanded.type = src.type;
-   emit(BRW_OPCODE_MOV, expanded, src);
-   return expanded;
-}
-
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
-{
-   switch (opcode) {
-   case SHADER_OPCODE_RCP:
-   case SHADER_OPCODE_RSQ:
-   case SHADER_OPCODE_SQRT:
-   case SHADER_OPCODE_EXP2:
-   case SHADER_OPCODE_LOG2:
-   case SHADER_OPCODE_SIN:
-   case SHADER_OPCODE_COS:
-      break;
-   default:
-      unreachable("not reached: bad math opcode");
-   }
-
-   /* Can't do hstride == 0 args to gen6 math, so expand it out.  We
-    * might be able to do better by doing execsize = 1 math and then
-    * expanding that result out, but we would need to be careful with
-    * masking.
-    *
-    * Gen 6 hardware ignores source modifiers (negate and abs) on math
-    * instructions, so we also move to a temp to set those up.
-    */
-   if (devinfo->gen == 6 || devinfo->gen == 7)
-      src = fix_math_operand(src);
-
-   fs_inst *inst = emit(opcode, dst, src);
-
-   if (devinfo->gen < 6) {
-      inst->base_mrf = 2;
-      inst->mlen = dispatch_width / 8;
-   }
-
-   return inst;
-}
-
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
-{
-   int base_mrf = 2;
-   fs_inst *inst;
-
-   if (devinfo->gen >= 8) {
-      inst = emit(opcode, dst, src0, src1);
-   } else if (devinfo->gen >= 6) {
-      src0 = fix_math_operand(src0);
-      src1 = fix_math_operand(src1);
-
-      inst = emit(opcode, dst, src0, src1);
-   } else {
-      /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
-       * "Message Payload":
-       *
-       * "Operand0[7].  For the INT DIV functions, this operand is the
-       *  denominator."
-       *  ...
-       * "Operand1[7].  For the INT DIV functions, this operand is the
-       *  numerator."
-       */
-      bool is_int_div = opcode != SHADER_OPCODE_POW;
-      fs_reg &op0 = is_int_div ? src1 : src0;
-      fs_reg &op1 = is_int_div ? src0 : src1;
-
-      emit(MOV(fs_reg(MRF, base_mrf + 1, op1.type, dispatch_width), op1));
-      inst = emit(opcode, dst, op0, reg_null_f);
-
-      inst->base_mrf = base_mrf;
-      inst->mlen = 2 * dispatch_width / 8;
-   }
-   return inst;
-}
-
  void
  fs_visitor::emit_discard_jump()
  {
@@ -1787,6 +1461,10 @@ fs_visitor::assign_vs_urb_setup()
     unsigned vue_entries =
        MAX2(count, vs_prog_data->base.vue_map.num_slots);
  
+   /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS
+    * command).  Each attribute is 16 bytes (4 floats/dwords), so each unit
+    * fits four attributes.
+    */
     vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
     vs_prog_data->base.urb_read_length = (count + 1) / 2;
  
@@ -2917,9 +2595,22 @@ fs_visitor::emit_repclear_shader()
     brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
     int base_mrf = 1;
     int color_mrf = base_mrf + 2;
+   fs_inst *mov;
  
-   fs_inst *mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
-                                     fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+   if (uniforms == 1) {
+      mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+                               fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+   } else {
+      struct brw_reg reg =
+         brw_reg(BRW_GENERAL_REGISTER_FILE,
+                 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
+                 BRW_VERTICAL_STRIDE_8,
+                 BRW_WIDTH_2,
+                 BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+
+      mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+                               fs_reg(reg));
+   }
  
     fs_inst *write;
     if (key->nr_color_regions == 1) {
@@ -2948,8 +2639,10 @@ fs_visitor::emit_repclear_shader()
     assign_curb_setup();
  
     /* Now that we have the uniform assigned, go ahead and force it to a vec4. */
-   assert(mov->src[0].file == HW_REG);
-   mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+   if (uniforms == 1) {
+      assert(mov->src[0].file == HW_REG);
+      mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+   }
  }
  
  /**
@@ -3471,7 +3164,7 @@ fs_visitor::lower_integer_multiplication()
           fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
                       inst->dst.type, dispatch_width);
  
-         if (brw->gen >= 7) {
+         if (devinfo->gen >= 7) {
              fs_reg src1_0_w = inst->src[1];
              fs_reg src1_1_w = inst->src[1];
  
@@ -3480,10 +3173,16 @@ fs_visitor::lower_integer_multiplication()
                 src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
              } else {
                 src1_0_w.type = BRW_REGISTER_TYPE_UW;
-               src1_0_w.stride = 2;
+               if (src1_0_w.stride != 0) {
+                  assert(src1_0_w.stride == 1);
+                  src1_0_w.stride = 2;
+               }
  
                 src1_1_w.type = BRW_REGISTER_TYPE_UW;
-               src1_1_w.stride = 2;
+               if (src1_1_w.stride != 0) {
+                  assert(src1_1_w.stride == 1);
+                  src1_1_w.stride = 2;
+               }
                 src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
              }
              ibld.MUL(low, inst->src[0], src1_0_w);
@@ -3493,10 +3192,16 @@ fs_visitor::lower_integer_multiplication()
              fs_reg src0_1_w = inst->src[0];
  
              src0_0_w.type = BRW_REGISTER_TYPE_UW;
-            src0_0_w.stride = 2;
+            if (src0_0_w.stride != 0) {
+               assert(src0_0_w.stride == 1);
+               src0_0_w.stride = 2;
+            }
  
              src0_1_w.type = BRW_REGISTER_TYPE_UW;
-            src0_1_w.stride = 2;
+            if (src0_1_w.stride != 0) {
+               assert(src0_1_w.stride == 1);
+               src0_1_w.stride = 2;
+            }
              src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
  
              ibld.MUL(low, src0_0_w, inst->src[1]);
@@ -3518,7 +3223,7 @@ fs_visitor::lower_integer_multiplication()
           ibld.ADD(dst, low, high);
  
           if (inst->conditional_mod) {
-            fs_reg null(retype(brw_null_reg(), inst->dst.type));
+            fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
              set_condmod(inst->conditional_mod,
                          ibld.MOV(null, inst->dst));
           }
@@ -3900,7 +3605,7 @@ fs_visitor::setup_vs_payload()
  void
  fs_visitor::setup_cs_payload()
  {
-   assert(brw->gen >= 7);
+   assert(devinfo->gen >= 7);
  
     payload.num_regs = 1;
  }
@@ -4083,9 +3788,11 @@ fs_visitor::allocate_registers()
           fail("Failure to register allocate.  Reduce number of "
                "live scalar values to avoid this.");
        } else {
-         perf_debug("%s shader triggered register spilling.  "
-                    "Try reducing the number of live scalar values to "
-                    "improve performance.\n", stage_name);
+         compiler->shader_perf_log(log_data,
+                                   "%s shader triggered register spilling.  "
+                                   "Try reducing the number of live scalar "
+                                   "values to improve performance.\n",
+                                   stage_name);
        }
  
        /* Since we're out of heuristics, just go spill registers until we
@@ -4114,14 +3821,15 @@ fs_visitor::allocate_registers()
  }
  
  bool
-fs_visitor::run_vs()
+fs_visitor::run_vs(gl_clip_plane *clip_planes)
  {
     assert(stage == MESA_SHADER_VERTEX);
  
-   assign_common_binding_table_offsets(0);
+   if (prog_data->map_entries == NULL)
+      assign_common_binding_table_offsets(0);
     setup_vs_payload();
  
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+   if (shader_time_index >= 0)
        emit_shader_time_begin();
  
     emit_nir_code();
@@ -4129,9 +3837,9 @@ fs_visitor::run_vs()
     if (failed)
        return false;
  
-   emit_urb_writes();
+   emit_urb_writes(clip_planes);
  
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+   if (shader_time_index >= 0)
        emit_shader_time_end();
  
     calculate_cfg();
@@ -4148,7 +3856,7 @@ fs_visitor::run_vs()
  }
  
  bool
-fs_visitor::run_fs()
+fs_visitor::run_fs(bool do_rep_send)
  {
     brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
     brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
@@ -4157,7 +3865,8 @@ fs_visitor::run_fs()
  
     sanity_param_count = prog->Parameters->NumParameters;
  
-   assign_binding_table_offsets();
+   if (prog_data->map_entries == NULL)
+      assign_binding_table_offsets();
  
     if (devinfo->gen >= 6)
        setup_payload_gen6();
@@ -4166,10 +3875,11 @@ fs_visitor::run_fs()
  
     if (0) {
        emit_dummy_fs();
-   } else if (brw->use_rep_send && dispatch_width == 16) {
+   } else if (do_rep_send) {
+      assert(dispatch_width == 16);
        emit_repclear_shader();
     } else {
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      if (shader_time_index >= 0)
           emit_shader_time_begin();
  
        calculate_urb_setup();
@@ -4204,7 +3914,7 @@ fs_visitor::run_fs()
  
        emit_fb_writes();
  
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      if (shader_time_index >= 0)
           emit_shader_time_end();
  
        calculate_cfg();
@@ -4248,7 +3958,7 @@ fs_visitor::run_cs()
  
     setup_cs_payload();
  
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+   if (shader_time_index >= 0)
        emit_shader_time_begin();
  
     emit_nir_code();
@@ -4258,7 +3968,7 @@ fs_visitor::run_cs()
  
     emit_cs_terminate();
  
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+   if (shader_time_index >= 0)
        emit_shader_time_end();
  
     calculate_cfg();
@@ -4308,11 +4018,18 @@ brw_wm_fs_emit(struct brw_context *brw,
     if (unlikely(INTEL_DEBUG & DEBUG_WM))
        brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
  
+   int st_index8 = -1, st_index16 = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8);
+      st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16);
+   }
+
     /* Now the main event: Visit the shader IR and generate our FS IR for it.
      */
-   fs_visitor v(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
-                prog, &fp->Base, 8);
-   if (!v.run_fs()) {
+   fs_visitor v(brw->intelScreen->compiler, brw,
+                mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
+                prog, &fp->Base, 8, st_index8);
+   if (!v.run_fs(false /* do_rep_send */)) {
        if (prog) {
           prog->LinkStatus = false;
           ralloc_strcat(&prog->InfoLog, v.fail_msg);
@@ -4325,21 +4042,18 @@ brw_wm_fs_emit(struct brw_context *brw,
     }
  
     cfg_t *simd16_cfg = NULL;
-   fs_visitor v2(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
-                 prog, &fp->Base, 16);
+   fs_visitor v2(brw->intelScreen->compiler, brw,
+                 mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
+                 prog, &fp->Base, 16, st_index16);
     if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
        if (!v.simd16_unsupported) {
           /* Try a SIMD16 compile */
           v2.import_uniforms(&v);
-         if (!v2.run_fs()) {
-            perf_debug("SIMD16 shader failed to compile, falling back to "
-                       "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
+         if (!v2.run_fs(brw->use_rep_send)) {
+            perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg);
           } else {
              simd16_cfg = v2.cfg;
           }
-      } else {
-         perf_debug("SIMD16 shader unsupported, falling back to "
-                    "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg);
        }
     }
  
@@ -4353,7 +4067,8 @@ brw_wm_fs_emit(struct brw_context *brw,
        prog_data->no_8 = false;
     }
  
-   fs_generator g(brw, mem_ctx, (void *) key, &prog_data->base,
+   fs_generator g(brw->intelScreen->compiler, brw,
+                  mem_ctx, (void *) key, &prog_data->base,
                    &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS");
  
     if (unlikely(INTEL_DEBUG & DEBUG_WM)) {