i965: Fix sampler state pointer adjustment for nonconst samplers
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
index 45eab294a08766f24a30191c4b2d62de7f843f7f..bb12a26b926990b2cf3c8e2eef3e8dd76bd2038d 100644 (file)
 #include "brw_defines.h"
 #include "brw_eu.h"
 
-#include "glsl/ralloc.h"
+#include "util/ralloc.h"
 
 /***********************************************************************
  * Internal helper for constructing instructions
  */
 
 static void guess_execution_size(struct brw_compile *p,
-                                struct brw_instruction *insn,
+                                brw_inst *insn,
                                 struct brw_reg reg)
 {
    const struct brw_context *brw = p->brw;
@@ -97,7 +97,7 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
     * registers required for messages with EOT.
     */
    struct brw_context *brw = p->brw;
-   if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
+   if (brw->gen >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
       reg->file = BRW_GENERAL_REGISTER_FILE;
       reg->nr += GEN7_MRF_HACK_START;
    }
@@ -160,8 +160,7 @@ brw_reg_type_to_hw_type(const struct brw_context *brw,
 }
 
 void
-brw_set_dest(struct brw_compile *p, struct brw_instruction *inst,
-            struct brw_reg dest)
+brw_set_dest(struct brw_compile *p, brw_inst *inst, struct brw_reg dest)
 {
    const struct brw_context *brw = p->brw;
 
@@ -225,8 +224,7 @@ brw_set_dest(struct brw_compile *p, struct brw_instruction *inst,
 extern int reg_type_size[];
 
 static void
-validate_reg(const struct brw_context *brw,
-             struct brw_instruction *inst, struct brw_reg reg)
+validate_reg(const struct brw_context *brw, brw_inst *inst, struct brw_reg reg)
 {
    int hstride_for_reg[] = {0, 1, 2, 4};
    int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
@@ -312,12 +310,11 @@ is_compactable_immediate(unsigned imm)
 }
 
 void
-brw_set_src0(struct brw_compile *p, struct brw_instruction *inst,
-            struct brw_reg reg)
+brw_set_src0(struct brw_compile *p, brw_inst *inst, struct brw_reg reg)
 {
    struct brw_context *brw = p->brw;
 
-   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &reg);
@@ -453,14 +450,12 @@ brw_set_src0(struct brw_compile *p, struct brw_instruction *inst,
 
 
 void
-brw_set_src1(struct brw_compile *p,
-             struct brw_instruction *inst,
-             struct brw_reg reg)
+brw_set_src1(struct brw_compile *p, brw_inst *inst, struct brw_reg reg)
 {
    const struct brw_context *brw = p->brw;
    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
-   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &reg);
@@ -535,7 +530,7 @@ brw_set_src1(struct brw_compile *p,
  */
 static void
 brw_set_message_descriptor(struct brw_compile *p,
-                          struct brw_instruction *inst,
+                          brw_inst *inst,
                           enum brw_message_target sfid,
                           unsigned msg_length,
                           unsigned response_length,
@@ -545,7 +540,19 @@ brw_set_message_descriptor(struct brw_compile *p,
    struct brw_context *brw = p->brw;
 
    brw_set_src1(p, inst, brw_imm_d(0));
-   brw_inst_set_sfid(brw, inst, sfid);
+
+   /* For indirect sends, `inst` will not be the SEND/SENDC instruction
+    * itself; instead, it will be a MOV/OR into the address register.
+    *
+    * In this case, we avoid setting the extended message descriptor bits,
+    * since they go on the later SEND/SENDC instead and if set here would
+    * instead clobber the conditionalmod bits.
+    */
+   unsigned opcode = brw_inst_opcode(brw, inst);
+   if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) {
+      brw_inst_set_sfid(brw, inst, sfid);
+   }
+
    brw_inst_set_mlen(brw, inst, msg_length);
    brw_inst_set_rlen(brw, inst, response_length);
    brw_inst_set_eot(brw, inst, end_of_thread);
@@ -556,7 +563,7 @@ brw_set_message_descriptor(struct brw_compile *p,
 }
 
 static void brw_set_math_message( struct brw_compile *p,
-                                 struct brw_instruction *inst,
+                                 brw_inst *inst,
                                  unsigned function,
                                  unsigned integer_type,
                                  bool low_precision,
@@ -603,7 +610,7 @@ static void brw_set_math_message( struct brw_compile *p,
 
 
 static void brw_set_ff_sync_message(struct brw_compile *p,
-                                   struct brw_instruction *insn,
+                                   brw_inst *insn,
                                    bool allocate,
                                    unsigned response_length,
                                    bool end_of_thread)
@@ -622,7 +629,7 @@ static void brw_set_ff_sync_message(struct brw_compile *p,
 }
 
 static void brw_set_urb_message( struct brw_compile *p,
-                                struct brw_instruction *insn,
+                                brw_inst *insn,
                                  enum brw_urb_write_flags flags,
                                 unsigned msg_length,
                                 unsigned response_length,
@@ -664,7 +671,7 @@ static void brw_set_urb_message( struct brw_compile *p,
 
 void
 brw_set_dp_write_message(struct brw_compile *p,
-                        struct brw_instruction *insn,
+                        brw_inst *insn,
                         unsigned binding_table_index,
                         unsigned msg_control,
                         unsigned msg_type,
@@ -705,7 +712,7 @@ brw_set_dp_write_message(struct brw_compile *p,
 
 void
 brw_set_dp_read_message(struct brw_compile *p,
-                       struct brw_instruction *insn,
+                       brw_inst *insn,
                        unsigned binding_table_index,
                        unsigned msg_control,
                        unsigned msg_type,
@@ -740,7 +747,7 @@ brw_set_dp_read_message(struct brw_compile *p,
 
 void
 brw_set_sampler_message(struct brw_compile *p,
-                        struct brw_instruction *inst,
+                        brw_inst *inst,
                         unsigned binding_table_index,
                         unsigned sampler,
                         unsigned msg_type,
@@ -765,9 +772,24 @@ brw_set_sampler_message(struct brw_compile *p,
    }
 }
 
+void brw_set_indirect_send_descriptor(struct brw_compile *p,
+                                      brw_inst *insn,
+                                      unsigned sfid,
+                                      struct brw_reg descriptor)
+{
+   /* Only a0.0 may be used as SEND's descriptor operand. */
+   assert(descriptor.file == BRW_ARCHITECTURE_REGISTER_FILE);
+   assert(descriptor.type == BRW_REGISTER_TYPE_UD);
+   assert(descriptor.nr == BRW_ARF_ADDRESS);
+   assert(descriptor.subnr == 0);
+
+   brw_set_message_descriptor(p, insn, sfid, 0, 0, false, false);
+   brw_set_src1(p, insn, descriptor);
+}
+
 static void
 gen7_set_dp_scratch_message(struct brw_compile *p,
-                            struct brw_instruction *inst,
+                            brw_inst *inst,
                             bool write,
                             bool dword,
                             bool invalidate_after_read,
@@ -791,16 +813,15 @@ gen7_set_dp_scratch_message(struct brw_compile *p,
 }
 
 #define next_insn brw_next_insn
-struct brw_instruction *
+brw_inst *
 brw_next_insn(struct brw_compile *p, unsigned opcode)
 {
    const struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    if (p->nr_insn + 1 > p->store_size) {
       p->store_size <<= 1;
-      p->store = reralloc(p->mem_ctx, p->store,
-                          struct brw_instruction, p->store_size);
+      p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
    }
 
    p->next_insn_offset += 16;
@@ -811,24 +832,21 @@ brw_next_insn(struct brw_compile *p, unsigned opcode)
    return insn;
 }
 
-static struct brw_instruction *brw_alu1( struct brw_compile *p,
-                                        unsigned opcode,
-                                        struct brw_reg dest,
-                                        struct brw_reg src )
+static brw_inst *
+brw_alu1(struct brw_compile *p, unsigned opcode,
+         struct brw_reg dest, struct brw_reg src)
 {
-   struct brw_instruction *insn = next_insn(p, opcode);
+   brw_inst *insn = next_insn(p, opcode);
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src);
    return insn;
 }
 
-static struct brw_instruction *brw_alu2(struct brw_compile *p,
-                                       unsigned opcode,
-                                       struct brw_reg dest,
-                                       struct brw_reg src0,
-                                       struct brw_reg src1 )
+static brw_inst *
+brw_alu2(struct brw_compile *p, unsigned opcode,
+         struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
 {
-   struct brw_instruction *insn = next_insn(p, opcode);
+   brw_inst *insn = next_insn(p, opcode);
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
    brw_set_src1(p, insn, src1);
@@ -846,15 +864,12 @@ get_3src_subreg_nr(struct brw_reg reg)
    }
 }
 
-static struct brw_instruction *brw_alu3(struct brw_compile *p,
-                                       unsigned opcode,
-                                       struct brw_reg dest,
-                                       struct brw_reg src0,
-                                       struct brw_reg src1,
-                                       struct brw_reg src2)
+static brw_inst *
+brw_alu3(struct brw_compile *p, unsigned opcode, struct brw_reg dest,
+         struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *inst = next_insn(p, opcode);
+   brw_inst *inst = next_insn(p, opcode);
 
    gen7_convert_mrf_to_grf(p, &dest);
 
@@ -940,7 +955,7 @@ static struct brw_instruction *brw_alu3(struct brw_compile *p,
  * Convenience routines.
  */
 #define ALU1(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
+brw_inst *brw_##OP(struct brw_compile *p,              \
              struct brw_reg dest,                      \
              struct brw_reg src0)                      \
 {                                                      \
@@ -948,7 +963,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p,     \
 }
 
 #define ALU2(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
+brw_inst *brw_##OP(struct brw_compile *p,              \
              struct brw_reg dest,                      \
              struct brw_reg src0,                      \
              struct brw_reg src1)                      \
@@ -957,7 +972,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p,     \
 }
 
 #define ALU3(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
+brw_inst *brw_##OP(struct brw_compile *p,              \
              struct brw_reg dest,                      \
              struct brw_reg src0,                      \
              struct brw_reg src1,                      \
@@ -967,7 +982,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p,     \
 }
 
 #define ALU3F(OP)                                               \
-struct brw_instruction *brw_##OP(struct brw_compile *p,         \
+brw_inst *brw_##OP(struct brw_compile *p,         \
                                  struct brw_reg dest,           \
                                  struct brw_reg src0,           \
                                  struct brw_reg src1,           \
@@ -993,7 +1008,7 @@ void brw_##OP(struct brw_compile *p,                                             \
              struct brw_reg src)                                             \
 {                                                                            \
    struct brw_context *brw = p->brw;                                         \
-   struct brw_instruction *rnd, *add;                                        \
+   brw_inst *rnd, *add;                                                              \
    rnd = next_insn(p, BRW_OPCODE_##OP);                                              \
    brw_set_dest(p, rnd, dest);                                               \
    brw_set_src0(p, rnd, src);                                                \
@@ -1016,8 +1031,6 @@ ALU2(XOR)
 ALU2(SHR)
 ALU2(SHL)
 ALU2(ASR)
-ALU1(F32TO16)
-ALU1(F16TO32)
 ALU1(FRC)
 ALU1(RNDD)
 ALU2(MAC)
@@ -1045,10 +1058,9 @@ ROUND(RNDZ)
 ROUND(RNDE)
 
 
-struct brw_instruction *brw_ADD(struct brw_compile *p,
-                               struct brw_reg dest,
-                               struct brw_reg src0,
-                               struct brw_reg src1)
+brw_inst *
+brw_ADD(struct brw_compile *p, struct brw_reg dest,
+        struct brw_reg src0, struct brw_reg src1)
 {
    /* 6.2.2: add */
    if (src0.type == BRW_REGISTER_TYPE_F ||
@@ -1068,10 +1080,9 @@ struct brw_instruction *brw_ADD(struct brw_compile *p,
    return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
 }
 
-struct brw_instruction *brw_AVG(struct brw_compile *p,
-                                struct brw_reg dest,
-                                struct brw_reg src0,
-                                struct brw_reg src1)
+brw_inst *
+brw_AVG(struct brw_compile *p, struct brw_reg dest,
+        struct brw_reg src0, struct brw_reg src1)
 {
    assert(dest.type == src0.type);
    assert(src0.type == src1.type);
@@ -1084,16 +1095,15 @@ struct brw_instruction *brw_AVG(struct brw_compile *p,
    case BRW_REGISTER_TYPE_UD:
       break;
    default:
-      assert(!"Bad type for brw_AVG");
+      unreachable("Bad type for brw_AVG");
    }
 
    return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
 }
 
-struct brw_instruction *brw_MUL(struct brw_compile *p,
-                               struct brw_reg dest,
-                               struct brw_reg src0,
-                               struct brw_reg src1)
+brw_inst *
+brw_MUL(struct brw_compile *p, struct brw_reg dest,
+        struct brw_reg src0, struct brw_reg src1)
 {
    /* 6.32.38: mul */
    if (src0.type == BRW_REGISTER_TYPE_D ||
@@ -1125,10 +1135,60 @@ struct brw_instruction *brw_MUL(struct brw_compile *p,
    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
 }
 
+brw_inst *
+brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
+{
+   const struct brw_context *brw = p->brw;
+   bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+
+   if (align16) {
+      assert(dst.type == BRW_REGISTER_TYPE_UD);
+   } else {
+      assert(dst.type == BRW_REGISTER_TYPE_W ||
+             dst.type == BRW_REGISTER_TYPE_UW ||
+             dst.type == BRW_REGISTER_TYPE_HF);
+   }
+
+   if (brw->gen >= 8) {
+      if (align16) {
+         /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's
+          * emit_pack_half_2x16 method.)
+          */
+         brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
+      }
+      return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
+   } else {
+      assert(brw->gen == 7);
+      return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+   }
+}
+
+brw_inst *
+brw_F16TO32(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
+{
+   const struct brw_context *brw = p->brw;
+   bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+
+   if (align16) {
+      assert(src.type == BRW_REGISTER_TYPE_UD);
+   } else {
+      assert(src.type == BRW_REGISTER_TYPE_W ||
+             src.type == BRW_REGISTER_TYPE_UW ||
+             src.type == BRW_REGISTER_TYPE_HF);
+   }
+
+   if (brw->gen >= 8) {
+      return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF));
+   } else {
+      assert(brw->gen == 7);
+      return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src);
+   }
+}
+
 
 void brw_NOP(struct brw_compile *p)
 {
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
+   brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
    brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
    brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
    brw_set_src1(p, insn, brw_imm_ud(0x0));
@@ -1142,13 +1202,13 @@ void brw_NOP(struct brw_compile *p)
  * Comparisons, if/else/endif
  */
 
-struct brw_instruction *brw_JMPI(struct brw_compile *p,
-                                 struct brw_reg index,
-                                 unsigned predicate_control)
+brw_inst *
+brw_JMPI(struct brw_compile *p, struct brw_reg index,
+         unsigned predicate_control)
 {
    const struct brw_context *brw = p->brw;
    struct brw_reg ip = brw_ip_reg();
-   struct brw_instruction *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
+   brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
 
    brw_inst_set_exec_size(brw, inst, BRW_EXECUTE_2);
    brw_inst_set_qtr_control(brw, inst, BRW_COMPRESSION_NONE);
@@ -1159,7 +1219,7 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p,
 }
 
 static void
-push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
+push_if_stack(struct brw_compile *p, brw_inst *inst)
 {
    p->if_stack[p->if_stack_depth] = inst - p->store;
 
@@ -1171,7 +1231,7 @@ push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
    }
 }
 
-static struct brw_instruction *
+static brw_inst *
 pop_if_stack(struct brw_compile *p)
 {
    p->if_stack_depth--;
@@ -1179,7 +1239,7 @@ pop_if_stack(struct brw_compile *p)
 }
 
 static void
-push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
+push_loop_stack(struct brw_compile *p, brw_inst *inst)
 {
    if (p->loop_stack_array_size < p->loop_stack_depth) {
       p->loop_stack_array_size *= 2;
@@ -1194,7 +1254,7 @@ push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
    p->if_depth_in_loop[p->loop_stack_depth] = 0;
 }
 
-static struct brw_instruction *
+static brw_inst *
 get_inner_do_insn(struct brw_compile *p)
 {
    return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
@@ -1213,11 +1273,11 @@ get_inner_do_insn(struct brw_compile *p)
  * When the matching 'endif' instruction is reached, the flags are
  * popped off.  If the stack is now empty, normal execution resumes.
  */
-struct brw_instruction *
+brw_inst *
 brw_IF(struct brw_compile *p, unsigned execute_size)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_IF);
 
@@ -1232,12 +1292,17 @@ brw_IF(struct brw_compile *p, unsigned execute_size)
       brw_inst_set_gen6_jump_count(brw, insn, 0);
       brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
       brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
-   } else {
+   } else if (brw->gen == 7) {
       brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
       brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
       brw_set_src1(p, insn, brw_imm_ud(0));
       brw_inst_set_jip(brw, insn, 0);
       brw_inst_set_uip(brw, insn, 0);
+   } else {
+      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+      brw_set_src0(p, insn, brw_imm_d(0));
+      brw_inst_set_jip(brw, insn, 0);
+      brw_inst_set_uip(brw, insn, 0);
    }
 
    brw_inst_set_exec_size(brw, insn, execute_size);
@@ -1255,12 +1320,12 @@ brw_IF(struct brw_compile *p, unsigned execute_size)
 /* This function is only used for gen6-style IF instructions with an
  * embedded comparison (conditional modifier).  It is not used on gen7.
  */
-struct brw_instruction *
-gen6_IF(struct brw_compile *p, uint32_t conditional,
+brw_inst *
+gen6_IF(struct brw_compile *p, enum brw_conditional_mod conditional,
        struct brw_reg src0, struct brw_reg src1)
 {
    const struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_IF);
 
@@ -1284,13 +1349,12 @@ gen6_IF(struct brw_compile *p, uint32_t conditional,
  */
 static void
 convert_IF_ELSE_to_ADD(struct brw_compile *p,
-                      struct brw_instruction *if_inst,
-                      struct brw_instruction *else_inst)
+                       brw_inst *if_inst, brw_inst *else_inst)
 {
    const struct brw_context *brw = p->brw;
 
    /* The next instruction (where the ENDIF would be, if it existed) */
-   struct brw_instruction *next_inst = &p->store[p->nr_insn];
+   brw_inst *next_inst = &p->store[p->nr_insn];
 
    assert(p->single_program_flow);
    assert(if_inst != NULL && brw_inst_opcode(brw, if_inst) == BRW_OPCODE_IF);
@@ -1326,9 +1390,7 @@ convert_IF_ELSE_to_ADD(struct brw_compile *p,
  */
 static void
 patch_IF_ELSE(struct brw_compile *p,
-             struct brw_instruction *if_inst,
-             struct brw_instruction *else_inst,
-             struct brw_instruction *endif_inst)
+              brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
 {
    struct brw_context *brw = p->brw;
 
@@ -1351,12 +1413,7 @@ patch_IF_ELSE(struct brw_compile *p,
    assert(endif_inst != NULL);
    assert(else_inst == NULL || brw_inst_opcode(brw, else_inst) == BRW_OPCODE_ELSE);
 
-   unsigned br = 1;
-   /* Jump count is for 64bit data chunk each, so one 128bit instruction
-    * requires 2 chunks.
-    */
-   if (brw->gen >= 5)
-      br = 2;
+   unsigned br = brw_jump_scale(brw);
 
    assert(brw_inst_opcode(brw, endif_inst) == BRW_OPCODE_ENDIF);
    brw_inst_set_exec_size(brw, endif_inst, brw_inst_exec_size(brw, if_inst));
@@ -1409,6 +1466,12 @@ patch_IF_ELSE(struct brw_compile *p,
         /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
          brw_inst_set_uip(brw, if_inst, br * (endif_inst - if_inst));
          brw_inst_set_jip(brw, else_inst, br * (endif_inst - else_inst));
+         if (brw->gen >= 8) {
+            /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
+             * should point to ENDIF.
+             */
+            brw_inst_set_uip(brw, else_inst, br * (endif_inst - else_inst));
+         }
       }
    }
 }
@@ -1417,7 +1480,7 @@ void
 brw_ELSE(struct brw_compile *p)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_ELSE);
 
@@ -1430,12 +1493,17 @@ brw_ELSE(struct brw_compile *p)
       brw_inst_set_gen6_jump_count(brw, insn, 0);
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   } else {
+   } else if (brw->gen == 7) {
       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, brw_imm_ud(0));
       brw_inst_set_jip(brw, insn, 0);
       brw_inst_set_uip(brw, insn, 0);
+   } else {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, brw_imm_d(0));
+      brw_inst_set_jip(brw, insn, 0);
+      brw_inst_set_uip(brw, insn, 0);
    }
 
    brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
@@ -1450,10 +1518,10 @@ void
 brw_ENDIF(struct brw_compile *p)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn = NULL;
-   struct brw_instruction *else_inst = NULL;
-   struct brw_instruction *if_inst = NULL;
-   struct brw_instruction *tmp;
+   brw_inst *insn = NULL;
+   brw_inst *else_inst = NULL;
+   brw_inst *if_inst = NULL;
+   brw_inst *tmp;
    bool emit_endif = true;
 
    /* In single program flow mode, we can express IF and ELSE instructions
@@ -1502,10 +1570,12 @@ brw_ENDIF(struct brw_compile *p)
       brw_set_dest(p, insn, brw_imm_w(0));
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   } else {
+   } else if (brw->gen == 7) {
       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, brw_imm_ud(0));
+   } else {
+      brw_set_src0(p, insn, brw_imm_d(0));
    }
 
    brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
@@ -1525,13 +1595,17 @@ brw_ENDIF(struct brw_compile *p)
    patch_IF_ELSE(p, if_inst, else_inst, insn);
 }
 
-struct brw_instruction *brw_BREAK(struct brw_compile *p)
+brw_inst *
+brw_BREAK(struct brw_compile *p)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_BREAK);
-   if (brw->gen >= 6) {
+   if (brw->gen >= 8) {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, brw_imm_d(0x0));
+   } else if (brw->gen >= 6) {
       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, brw_imm_d(0x0));
@@ -1543,53 +1617,51 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p)
                                   p->if_depth_in_loop[p->loop_stack_depth]);
    }
    brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
-   brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_8);
+   brw_inst_set_exec_size(brw, insn, p->compressed ? BRW_EXECUTE_16
+                                                   : BRW_EXECUTE_8);
 
    return insn;
 }
 
-struct brw_instruction *gen6_CONT(struct brw_compile *p)
+brw_inst *
+brw_CONT(struct brw_compile *p)
 {
    const struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_CONTINUE);
-   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
    brw_set_dest(p, insn, brw_ip_reg());
-   brw_set_src0(p, insn, brw_ip_reg());
-   brw_set_src1(p, insn, brw_imm_d(0x0));
-
-   brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
-   brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_8);
-   return insn;
-}
+   if (brw->gen >= 8) {
+      brw_set_src0(p, insn, brw_imm_d(0x0));
+   } else {
+      brw_set_src0(p, insn, brw_ip_reg());
+      brw_set_src1(p, insn, brw_imm_d(0x0));
+   }
 
-struct brw_instruction *brw_CONT(struct brw_compile *p)
-{
-   const struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
-   insn = next_insn(p, BRW_OPCODE_CONTINUE);
-   brw_set_dest(p, insn, brw_ip_reg());
-   brw_set_src0(p, insn, brw_ip_reg());
-   brw_set_src1(p, insn, brw_imm_d(0x0));
+   if (brw->gen < 6) {
+      brw_inst_set_gen4_pop_count(brw, insn,
+                                  p->if_depth_in_loop[p->loop_stack_depth]);
+   }
    brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
-   brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_8);
-   /* insn->header.mask_control = BRW_MASK_DISABLE; */
-   brw_inst_set_gen4_pop_count(brw, insn,
-                               p->if_depth_in_loop[p->loop_stack_depth]);
+   brw_inst_set_exec_size(brw, insn, p->compressed ? BRW_EXECUTE_16
+                                                   : BRW_EXECUTE_8);
    return insn;
 }
 
-struct brw_instruction *gen6_HALT(struct brw_compile *p)
+brw_inst *
+gen6_HALT(struct brw_compile *p)
 {
    const struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_HALT);
    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+   if (brw->gen >= 8) {
+      brw_set_src0(p, insn, brw_imm_d(0x0));
+   } else {
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+   }
 
    if (p->compressed) {
       brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_16);
@@ -1616,7 +1688,8 @@ struct brw_instruction *gen6_HALT(struct brw_compile *p)
  * For gen6, there's no more mask stack, so no need for DO.  WHILE
  * just points back to the first instruction of the loop.
  */
-struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
+brw_inst *
+brw_DO(struct brw_compile *p, unsigned execute_size)
 {
    struct brw_context *brw = p->brw;
 
@@ -1624,7 +1697,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
       push_loop_stack(p, &p->store[p->nr_insn]);
       return &p->store[p->nr_insn];
    } else {
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+      brw_inst *insn = next_insn(p, BRW_OPCODE_DO);
 
       push_loop_stack(p, insn);
 
@@ -1650,12 +1723,14 @@ struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
  * nesting, since it can always just point to the end of the block/current loop.
  */
 static void
-brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
+brw_patch_break_cont(struct brw_compile *p, brw_inst *while_inst)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *do_inst = get_inner_do_insn(p);
-   struct brw_instruction *inst;
-   int br = (brw->gen == 5) ? 2 : 1;
+   brw_inst *do_inst = get_inner_do_insn(p);
+   brw_inst *inst;
+   unsigned br = brw_jump_scale(brw);
+
+   assert(brw->gen < 6);
 
    for (inst = while_inst - 1; inst != do_inst; inst--) {
       /* If the jump count is != 0, that means that this instruction has already
@@ -1672,35 +1747,35 @@ brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
    }
 }
 
-struct brw_instruction *brw_WHILE(struct brw_compile *p)
+brw_inst *
+brw_WHILE(struct brw_compile *p)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn, *do_insn;
-   unsigned br = 1;
+   brw_inst *insn, *do_insn;
+   unsigned br = brw_jump_scale(brw);
 
-   if (brw->gen >= 5)
-      br = 2;
-
-   if (brw->gen >= 7) {
-      insn = next_insn(p, BRW_OPCODE_WHILE);
-      do_insn = get_inner_do_insn(p);
-
-      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_ud(0));
-      brw_inst_set_jip(brw, insn, br * (do_insn - insn));
-
-      brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_8);
-   } else if (brw->gen == 6) {
+   if (brw->gen >= 6) {
       insn = next_insn(p, BRW_OPCODE_WHILE);
       do_insn = get_inner_do_insn(p);
 
-      brw_set_dest(p, insn, brw_imm_w(0));
-      brw_inst_set_gen6_jump_count(brw, insn, br * (do_insn - insn));
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      if (brw->gen >= 8) {
+         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+         brw_set_src0(p, insn, brw_imm_d(0));
+         brw_inst_set_jip(brw, insn, br * (do_insn - insn));
+      } else if (brw->gen == 7) {
+         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+         brw_set_src1(p, insn, brw_imm_ud(0));
+         brw_inst_set_jip(brw, insn, br * (do_insn - insn));
+      } else {
+         brw_set_dest(p, insn, brw_imm_w(0));
+         brw_inst_set_gen6_jump_count(brw, insn, br * (do_insn - insn));
+         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+         brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      }
 
-      brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_8);
+      brw_inst_set_exec_size(brw, insn, p->compressed ? BRW_EXECUTE_16
+                                                      : BRW_EXECUTE_8);
    } else {
       if (p->single_program_flow) {
         insn = next_insn(p, BRW_OPCODE_ADD);
@@ -1739,7 +1814,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p)
 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
+   brw_inst *jmp_insn = &p->store[jmp_insn_idx];
    unsigned jmpi = 1;
 
    if (brw->gen >= 5)
@@ -1763,7 +1838,15 @@ void brw_CMP(struct brw_compile *p,
             struct brw_reg src1)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+   brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
+
+   if (brw->gen >= 8) {
+      /* The CMP instruction appears to behave erratically for floating point
+       * sources unless the destination type is also float.  Overriding it to
+       * match src0 makes it work in all cases.
+       */
+      dest.type = src0.type;
+   }
 
    brw_inst_set_cond_modifier(brw, insn, conditional);
    brw_set_dest(p, insn, dest);
@@ -1796,11 +1879,18 @@ void gen4_math(struct brw_compile *p,
               unsigned function,
               unsigned msg_reg_nr,
               struct brw_reg src,
-              unsigned data_type,
               unsigned precision )
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+   unsigned data_type;
+   if (src.vstride == BRW_VERTICAL_STRIDE_0 &&
+       src.width == BRW_WIDTH_1 &&
+       src.hstride == BRW_HORIZONTAL_STRIDE_0) {
+      data_type = BRW_MATH_DATA_SCALAR;
+   } else {
+      data_type = BRW_MATH_DATA_VECTOR;
+   }
 
    assert(brw->gen < 6);
 
@@ -1827,13 +1917,14 @@ void gen6_math(struct brw_compile *p,
               struct brw_reg src1)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
+   brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
 
    assert(brw->gen >= 6);
 
    assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
           (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
-   assert(src0.file == BRW_GENERAL_REGISTER_FILE);
+   assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
+          (brw->gen >= 8 && src0.file == BRW_IMMEDIATE_VALUE));
 
    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
    if (brw->gen == 6) {
@@ -1846,12 +1937,14 @@ void gen6_math(struct brw_compile *p,
        function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
       assert(src0.type != BRW_REGISTER_TYPE_F);
       assert(src1.type != BRW_REGISTER_TYPE_F);
-      assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
+             (brw->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
    } else {
       assert(src0.type == BRW_REGISTER_TYPE_F);
       assert(src1.type == BRW_REGISTER_TYPE_F);
       if (function == BRW_MATH_FUNCTION_POW) {
-         assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+         assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
+                (brw->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
       } else {
          assert(src1.file == BRW_ARCHITECTURE_REGISTER_FILE &&
                 src1.nr == BRW_ARF_NULL);
@@ -1927,7 +2020,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
 
    {
       struct brw_reg dest;
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
       int send_commit_msg;
       struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
                                         BRW_REGISTER_TYPE_UW);
@@ -2006,7 +2099,18 @@ brw_oword_block_read_scratch(struct brw_compile *p,
    if (brw->gen >= 6)
       offset /= 16;
 
-   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+   if (p->brw->gen >= 7) {
+      /* On gen 7 and above, we no longer have message registers and we can
+       * send from any register we want.  By using the destination register
+       * for the message, we guarantee that the implied message write won't
+       * accidentally overwrite anything.  This has been a problem because
+       * the MRF registers and source for the final FB write are both fixed
+       * and may overlap.
+       */
+      mrf = retype(dest, BRW_REGISTER_TYPE_UD);
+   } else {
+      mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+   }
    dest = retype(dest, BRW_REGISTER_TYPE_UW);
 
    if (num_regs == 1) {
@@ -2025,17 +2129,13 @@ brw_oword_block_read_scratch(struct brw_compile *p,
       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
       /* set message header global offset field (reg 0, element 2) */
-      brw_MOV(p,
-             retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
-                                 mrf.nr,
-                                 2), BRW_REGISTER_TYPE_UD),
-             brw_imm_ud(offset));
+      brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
 
       brw_pop_insn_state(p);
    }
 
    {
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
       assert(brw_inst_pred_control(brw, insn) == 0);
       brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
@@ -2067,7 +2167,7 @@ gen7_block_read_scratch(struct brw_compile *p,
                         unsigned offset)
 {
    const struct brw_context *brw = p->brw;
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
    assert(brw_inst_pred_control(brw, insn) == BRW_PREDICATE_NONE);
 
    brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
@@ -2129,7 +2229,7 @@ void brw_oword_block_read(struct brw_compile *p,
                               2), BRW_REGISTER_TYPE_UD),
           brw_imm_ud(offset));
 
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
    /* cast dest to a uword[8] vector */
    dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
@@ -2158,8 +2258,8 @@ void brw_oword_block_read(struct brw_compile *p,
 
 void brw_fb_WRITE(struct brw_compile *p,
                  int dispatch_width,
-                  unsigned msg_reg_nr,
-                  struct brw_reg src0,
+                  struct brw_reg payload,
+                  struct brw_reg implied_header,
                   unsigned msg_control,
                   unsigned binding_table_index,
                   unsigned msg_length,
@@ -2168,9 +2268,9 @@ void brw_fb_WRITE(struct brw_compile *p,
                   bool header_present)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
    unsigned msg_type;
-   struct brw_reg dest;
+   struct brw_reg dest, src0;
 
    if (dispatch_width == 16)
       dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
@@ -2186,11 +2286,13 @@ void brw_fb_WRITE(struct brw_compile *p,
 
    if (brw->gen >= 6) {
       /* headerless version, just submit color payload */
-      src0 = brw_message_reg(msg_reg_nr);
+      src0 = payload;
 
       msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
    } else {
-      brw_inst_set_base_mrf(brw, insn, msg_reg_nr);
+      assert(payload.file == BRW_MESSAGE_REGISTER_FILE);
+      brw_inst_set_base_mrf(brw, insn, payload.nr);
+      src0 = implied_header;
 
       msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
    }
@@ -2230,7 +2332,7 @@ void brw_SAMPLE(struct brw_compile *p,
                unsigned return_format)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    if (msg_reg_nr != -1)
       gen6_resolve_implied_move(p, &src0, msg_reg_nr);
@@ -2269,6 +2371,53 @@ void brw_SAMPLE(struct brw_compile *p,
                            return_format);
 }
 
+/* Adjust the message header's sampler state pointer to
+ * select the correct group of 16 samplers.
+ */
+void brw_adjust_sampler_state_pointer(struct brw_compile *p,
+                                      struct brw_reg header,
+                                      struct brw_reg sampler_index,
+                                      struct brw_reg scratch)
+{
+   /* The "Sampler Index" field can only store values between 0 and 15.
+    * However, we can add an offset to the "Sampler State Pointer"
+    * field, effectively selecting a different set of 16 samplers.
+    *
+    * The "Sampler State Pointer" needs to be aligned to a 32-byte
+    * offset, and each sampler state is only 16-bytes, so we can't
+    * exclusively use the offset - we have to use both.
+    */
+
+   struct brw_context *brw = p->brw;
+
+   if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
+      const int sampler_state_size = 16; /* 16 bytes */
+      uint32_t sampler = sampler_index.dw1.ud;
+
+      if (sampler >= 16) {
+         assert(brw->is_haswell || brw->gen >= 8);
+         brw_ADD(p,
+                 get_element_ud(header, 3),
+                 get_element_ud(brw_vec8_grf(0, 0), 3),
+                 brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
+      }
+   } else {
+      /* Non-const sampler array indexing case */
+      if (brw->gen < 8 && !brw->is_haswell) {
+         return;
+      }
+
+      struct brw_reg temp = vec1(retype(scratch, BRW_REGISTER_TYPE_UD));
+
+      brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
+      brw_SHL(p, temp, temp, brw_imm_ud(4));
+      brw_ADD(p,
+              get_element_ud(header, 3),
+              get_element_ud(brw_vec8_grf(0, 0), 3),
+              temp);
+   }
+}
+
 /* All these variables are pretty confusing - we might be better off
  * using bitmasks and macros for this, in the old style.  Or perhaps
  * just having the caller instantiate the fields in dword3 itself.
@@ -2284,11 +2433,11 @@ void brw_urb_WRITE(struct brw_compile *p,
                   unsigned swizzle)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
-   if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
+   if (brw->gen >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
       /* Enable Channel Masks in the URB_WRITE_HWORD message header */
       brw_push_insn_state(p);
       brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -2330,7 +2479,7 @@ brw_find_next_block_end(struct brw_compile *p, int start_offset)
    for (offset = next_offset(brw, store, start_offset);
         offset < p->next_insn_offset;
         offset = next_offset(brw, store, offset)) {
-      struct brw_instruction *insn = store + offset;
+      brw_inst *insn = store + offset;
 
       switch (brw_inst_opcode(brw, insn)) {
       case BRW_OPCODE_ENDIF:
@@ -2353,16 +2502,18 @@ brw_find_loop_end(struct brw_compile *p, int start_offset)
 {
    struct brw_context *brw = p->brw;
    int offset;
-   int scale = 8;
+   int scale = 16 / brw_jump_scale(brw);
    void *store = p->store;
 
+   assert(brw->gen >= 6);
+
    /* Always start after the instruction (such as a WHILE) we're trying to fix
     * up.
     */
    for (offset = next_offset(brw, store, start_offset);
         offset < p->next_insn_offset;
         offset = next_offset(brw, store, offset)) {
-      struct brw_instruction *insn = store + offset;
+      brw_inst *insn = store + offset;
 
       if (brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE) {
          int jip = brw->gen == 6 ? brw_inst_gen6_jump_count(brw, insn)
@@ -2383,7 +2534,8 @@ brw_set_uip_jip(struct brw_compile *p)
 {
    struct brw_context *brw = p->brw;
    int offset;
-   int scale = 8;
+   int br = brw_jump_scale(brw);
+   int scale = 16 / br;
    void *store = p->store;
 
    if (brw->gen < 6)
@@ -2391,7 +2543,7 @@ brw_set_uip_jip(struct brw_compile *p)
 
    for (offset = 0; offset < p->next_insn_offset;
         offset = next_offset(brw, store, offset)) {
-      struct brw_instruction *insn = store + offset;
+      brw_inst *insn = store + offset;
 
       if (brw_inst_cmpt_control(brw, insn)) {
         /* Fixups for compacted BREAK/CONTINUE not supported yet. */
@@ -2421,12 +2573,15 @@ brw_set_uip_jip(struct brw_compile *p)
          assert(brw_inst_jip(brw, insn) != 0);
         break;
 
-      case BRW_OPCODE_ENDIF:
-         if (block_end_offset == 0)
-            brw_inst_set_jip(brw, insn, 2);
+      case BRW_OPCODE_ENDIF: {
+         int32_t jump = (block_end_offset == 0) ?
+                        1 * br : (block_end_offset - offset) / scale;
+         if (brw->gen >= 7)
+            brw_inst_set_jip(brw, insn, jump);
          else
-            brw_inst_set_jip(brw, insn, (block_end_offset - offset) / scale);
+            brw_inst_set_gen6_jump_count(brw, insn, jump);
         break;
+      }
 
       case BRW_OPCODE_HALT:
         /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
@@ -2461,7 +2616,7 @@ void brw_ff_sync(struct brw_compile *p,
                   bool eot)
 {
    struct brw_context *brw = p->brw;
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
@@ -2499,7 +2654,7 @@ brw_svb_write(struct brw_compile *p,
               unsigned binding_table_index,
               bool   send_commit_msg)
 {
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
@@ -2521,7 +2676,7 @@ brw_svb_write(struct brw_compile *p,
 
 static void
 brw_set_dp_untyped_atomic_message(struct brw_compile *p,
-                                  struct brw_instruction *insn,
+                                  brw_inst *insn,
                                   unsigned atomic_op,
                                   unsigned bind_table_index,
                                   unsigned msg_length,
@@ -2534,7 +2689,7 @@ brw_set_dp_untyped_atomic_message(struct brw_compile *p,
       atomic_op | /* Atomic Operation Type: BRW_AOP_* */
       (response_length ? 1 << 5 : 0); /* Return data expected */
 
-   if (brw->is_haswell) {
+   if (brw->gen >= 8 || brw->is_haswell) {
       brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
                                  msg_length, response_length,
                                  header_present, false);
@@ -2568,16 +2723,16 @@ brw_set_dp_untyped_atomic_message(struct brw_compile *p,
 void
 brw_untyped_atomic(struct brw_compile *p,
                    struct brw_reg dest,
-                   struct brw_reg mrf,
+                   struct brw_reg payload,
                    unsigned atomic_op,
                    unsigned bind_table_index,
                    unsigned msg_length,
                    unsigned response_length) {
    const struct brw_context *brw = p->brw;
-   struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
    brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
-   brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
+   brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UD));
    brw_set_src1(p, insn, brw_imm_d(0));
    brw_set_dp_untyped_atomic_message(
       p, insn, atomic_op, bind_table_index, msg_length, response_length,
@@ -2586,7 +2741,7 @@ brw_untyped_atomic(struct brw_compile *p,
 
 static void
 brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
-                                        struct brw_instruction *insn,
+                                        brw_inst *insn,
                                         unsigned bind_table_index,
                                         unsigned msg_length,
                                         unsigned response_length,
@@ -2597,7 +2752,7 @@ brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
       (brw_inst_exec_size(brw, insn) == BRW_EXECUTE_16 ? 16 : 8);
    const unsigned num_channels = response_length / (dispatch_width / 8);
 
-   if (brw->is_haswell) {
+   if (brw->gen >= 8 || brw->is_haswell) {
       brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
                                  msg_length, response_length,
                                  header_present, false);
@@ -2636,7 +2791,7 @@ brw_untyped_surface_read(struct brw_compile *p,
                          unsigned response_length)
 {
    const struct brw_context *brw = p->brw;
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
    brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
    brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
@@ -2645,6 +2800,34 @@ brw_untyped_surface_read(struct brw_compile *p,
       brw_inst_access_mode(brw, insn) == BRW_ALIGN_1);
 }
 
+void
+brw_pixel_interpolator_query(struct brw_compile *p,
+                             struct brw_reg dest,
+                             struct brw_reg mrf,
+                             bool noperspective,
+                             unsigned mode,
+                             unsigned data,
+                             unsigned msg_length,
+                             unsigned response_length)
+{
+   const struct brw_context *brw = p->brw;
+   struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, mrf);
+   brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
+                              msg_length, response_length,
+                              false /* header is never present for PI */,
+                              false);
+
+   brw_inst_set_pi_simd_mode(
+         brw, insn, brw_inst_exec_size(brw, insn) == BRW_EXECUTE_16);
+   brw_inst_set_pi_slot_group(brw, insn, 0); /* zero unless 32/64px dispatch */
+   brw_inst_set_pi_nopersp(brw, insn, noperspective);
+   brw_inst_set_pi_message_type(brw, insn, mode);
+   brw_inst_set_pi_message_data(brw, insn, data);
+}
+
 /**
  * This instruction is generated as a single-channel align1 instruction by
  * both the VS and FS stages when using INTEL_DEBUG=shader_time.
@@ -2670,7 +2853,7 @@ void brw_shader_time_add(struct brw_compile *p,
    brw_push_insn_state(p);
    brw_set_default_access_mode(p, BRW_ALIGN_1);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
    brw_pop_insn_state(p);
 
    /* We use brw_vec1_reg and unmasked because we want to increment the given