i965: Refactor SIMD16-to-2xSIMD8 checks.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
index f2cacd17762f8d6ab31d75ca2961edd5740391b0..1ca79a943046f4568ab4336aab897c21d7290397 100644 (file)
@@ -1,8 +1,8 @@
 /*
  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ Intel funded Tungsten Graphics to
  develop this 3D driver.
+
  Permission is hereby granted, free of charge, to any person obtaining
  a copy of this software and associated documentation files (the
  "Software"), to deal in the Software without restriction, including
  distribute, sublicense, and/or sell copies of the Software, and to
  permit persons to whom the Software is furnished to do so, subject to
  the following conditions:
+
  The above copyright notice and this permission notice (including the
  next paragraph) shall be included in all copies or substantial
  portions of the Software.
+
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
  **********************************************************************/
  /*
   * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Keith Whitwell <keithw@vmware.com>
   */
-     
+
 
 #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_eu.h"
 
-#include "glsl/ralloc.h"
+#include "util/ralloc.h"
 
 /***********************************************************************
  * Internal helper for constructing instructions
  */
 
 static void guess_execution_size(struct brw_compile *p,
-                                struct brw_instruction *insn,
+                                brw_inst *insn,
                                 struct brw_reg reg)
 {
-   if (reg.width == BRW_WIDTH_8 && p->compressed)
-      insn->header.execution_size = BRW_EXECUTE_16;
-   else
-      insn->header.execution_size = reg.width; /* note - definitions are compatible */
+   const struct brw_context *brw = p->brw;
+
+   if (reg.width == BRW_WIDTH_8 && p->compressed) {
+      brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_16);
+   } else {
+      /* Register width definitions are compatible with BRW_EXECUTE_* enums. */
+      brw_inst_set_exec_size(brw, insn, reg.width);
+   }
 }
 
 
@@ -61,10 +65,10 @@ static void guess_execution_size(struct brw_compile *p,
 void
 gen6_resolve_implied_move(struct brw_compile *p,
                          struct brw_reg *src,
-                         GLuint msg_reg_nr)
+                         unsigned msg_reg_nr)
 {
-   struct intel_context *intel = &p->brw->intel;
-   if (intel->gen < 6)
+   struct brw_context *brw = p->brw;
+   if (brw->gen < 6)
       return;
 
    if (src->file == BRW_MESSAGE_REGISTER_FILE)
@@ -72,8 +76,8 @@ gen6_resolve_implied_move(struct brw_compile *p,
 
    if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
       brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
       brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
              retype(*src, BRW_REGISTER_TYPE_UD));
       brw_pop_insn_state(p);
@@ -84,7 +88,7 @@ gen6_resolve_implied_move(struct brw_compile *p,
 static void
 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
 {
-   /* From the BSpec / ISA Reference / send - [DevIVB+]:
+   /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
     * "The send with EOT should use register space R112-R127 for <src>. This is
     *  to enable loading of a new thread into the same slot while the message
     *  with EOT for current thread is pending dispatch."
@@ -92,80 +96,140 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
     * Since we're pretending to have 16 MRFs anyway, we may as well use the
     * registers required for messages with EOT.
     */
-   struct intel_context *intel = &p->brw->intel;
-   if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
+   struct brw_context *brw = p->brw;
+   if (brw->gen >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
       reg->file = BRW_GENERAL_REGISTER_FILE;
       reg->nr += GEN7_MRF_HACK_START;
    }
 }
 
+/**
+ * Convert a brw_reg_type enumeration value into the hardware representation.
+ *
+ * The hardware encoding may depend on whether the value is an immediate.
+ */
+unsigned
+brw_reg_type_to_hw_type(const struct brw_context *brw,
+                        enum brw_reg_type type, unsigned file)
+{
+   if (file == BRW_IMMEDIATE_VALUE) {
+      const static int imm_hw_types[] = {
+         [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
+         [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
+         [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
+         [BRW_REGISTER_TYPE_W]  = BRW_HW_REG_TYPE_W,
+         [BRW_REGISTER_TYPE_F]  = BRW_HW_REG_TYPE_F,
+         [BRW_REGISTER_TYPE_UB] = -1,
+         [BRW_REGISTER_TYPE_B]  = -1,
+         [BRW_REGISTER_TYPE_UV] = BRW_HW_REG_IMM_TYPE_UV,
+         [BRW_REGISTER_TYPE_VF] = BRW_HW_REG_IMM_TYPE_VF,
+         [BRW_REGISTER_TYPE_V]  = BRW_HW_REG_IMM_TYPE_V,
+         [BRW_REGISTER_TYPE_DF] = GEN8_HW_REG_IMM_TYPE_DF,
+         [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_IMM_TYPE_HF,
+         [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
+         [BRW_REGISTER_TYPE_Q]  = GEN8_HW_REG_TYPE_Q,
+      };
+      assert(type < ARRAY_SIZE(imm_hw_types));
+      assert(imm_hw_types[type] != -1);
+      assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_DF);
+      return imm_hw_types[type];
+   } else {
+      /* Non-immediate registers */
+      const static int hw_types[] = {
+         [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
+         [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
+         [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
+         [BRW_REGISTER_TYPE_W]  = BRW_HW_REG_TYPE_W,
+         [BRW_REGISTER_TYPE_UB] = BRW_HW_REG_NON_IMM_TYPE_UB,
+         [BRW_REGISTER_TYPE_B]  = BRW_HW_REG_NON_IMM_TYPE_B,
+         [BRW_REGISTER_TYPE_F]  = BRW_HW_REG_TYPE_F,
+         [BRW_REGISTER_TYPE_UV] = -1,
+         [BRW_REGISTER_TYPE_VF] = -1,
+         [BRW_REGISTER_TYPE_V]  = -1,
+         [BRW_REGISTER_TYPE_DF] = GEN7_HW_REG_NON_IMM_TYPE_DF,
+         [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_NON_IMM_TYPE_HF,
+         [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
+         [BRW_REGISTER_TYPE_Q]  = GEN8_HW_REG_TYPE_Q,
+      };
+      assert(type < ARRAY_SIZE(hw_types));
+      assert(hw_types[type] != -1);
+      assert(brw->gen >= 7 || type < BRW_REGISTER_TYPE_DF);
+      assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_HF);
+      return hw_types[type];
+   }
+}
 
 void
-brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
-            struct brw_reg dest)
+brw_set_dest(struct brw_compile *p, brw_inst *inst, struct brw_reg dest)
 {
+   const struct brw_context *brw = p->brw;
+
    if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
        dest.file != BRW_MESSAGE_REGISTER_FILE)
       assert(dest.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &dest);
 
-   insn->bits1.da1.dest_reg_file = dest.file;
-   insn->bits1.da1.dest_reg_type = dest.type;
-   insn->bits1.da1.dest_address_mode = dest.address_mode;
+   brw_inst_set_dst_reg_file(brw, inst, dest.file);
+   brw_inst_set_dst_reg_type(brw, inst, brw_reg_type_to_hw_type(brw, dest.type,
+                                                                dest.file));
+   brw_inst_set_dst_address_mode(brw, inst, dest.address_mode);
 
-   if (dest.address_mode == BRW_ADDRESS_DIRECT) {   
-      insn->bits1.da1.dest_reg_nr = dest.nr;
+   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+      brw_inst_set_dst_da_reg_nr(brw, inst, dest.nr);
 
-      if (insn->header.access_mode == BRW_ALIGN_1) {
-        insn->bits1.da1.dest_subreg_nr = dest.subnr;
+      if (brw_inst_access_mode(brw, inst) == BRW_ALIGN_1) {
+         brw_inst_set_dst_da1_subreg_nr(brw, inst, dest.subnr);
         if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
            dest.hstride = BRW_HORIZONTAL_STRIDE_1;
-        insn->bits1.da1.dest_horiz_stride = dest.hstride;
-      }
-      else {
-        insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
-        insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+         brw_inst_set_dst_hstride(brw, inst, dest.hstride);
+      } else {
+         brw_inst_set_dst_da16_subreg_nr(brw, inst, dest.subnr / 16);
+         brw_inst_set_da16_writemask(brw, inst, dest.dw1.bits.writemask);
+         if (dest.file == BRW_GENERAL_REGISTER_FILE ||
+             dest.file == BRW_MESSAGE_REGISTER_FILE) {
+            assert(dest.dw1.bits.writemask != 0);
+         }
         /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
          *    Although Dst.HorzStride is a don't care for Align16, HW needs
          *    this to be programmed as "01".
          */
-        insn->bits1.da16.dest_horiz_stride = 1;
+         brw_inst_set_dst_hstride(brw, inst, 1);
       }
-   }
-   else {
-      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+   } else {
+      brw_inst_set_dst_ia_subreg_nr(brw, inst, dest.subnr);
 
       /* These are different sizes in align1 vs align16:
        */
-      if (insn->header.access_mode == BRW_ALIGN_1) {
-        insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+      if (brw_inst_access_mode(brw, inst) == BRW_ALIGN_1) {
+         brw_inst_set_dst_ia1_addr_imm(brw, inst,
+                                       dest.dw1.bits.indirect_offset);
         if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
            dest.hstride = BRW_HORIZONTAL_STRIDE_1;
-        insn->bits1.ia1.dest_horiz_stride = dest.hstride;
-      }
-      else {
-        insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+         brw_inst_set_dst_hstride(brw, inst, dest.hstride);
+      } else {
+         brw_inst_set_dst_ia16_addr_imm(brw, inst,
+                                        dest.dw1.bits.indirect_offset);
         /* even ignored in da16, still need to set as '01' */
-        insn->bits1.ia16.dest_horiz_stride = 1;
+         brw_inst_set_dst_hstride(brw, inst, 1);
       }
    }
 
    /* NEW: Set the execution size based on dest.width and
-    * insn->compression_control:
+    * inst->compression_control:
     */
-   guess_execution_size(p, insn, dest);
+   guess_execution_size(p, inst, dest);
 }
 
 extern int reg_type_size[];
 
 static void
-validate_reg(struct brw_instruction *insn, struct brw_reg reg)
+validate_reg(const struct brw_context *brw, brw_inst *inst, struct brw_reg reg)
 {
-   int hstride_for_reg[] = {0, 1, 2, 4};
-   int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
-   int width_for_reg[] = {1, 2, 4, 8, 16};
-   int execsize_for_reg[] = {1, 2, 4, 8, 16};
+   const int hstride_for_reg[] = {0, 1, 2, 4};
+   const int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32};
+   const int width_for_reg[] = {1, 2, 4, 8, 16};
+   const int execsize_for_reg[] = {1, 2, 4, 8, 16};
    int width, hstride, vstride, execsize;
 
    if (reg.file == BRW_IMMEDIATE_VALUE) {
@@ -174,8 +238,8 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
        * destination horiz stride has to be a word.
        */
       if (reg.type == BRW_REGISTER_TYPE_V) {
-        assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
-               reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
+         assert(hstride_for_reg[brw_inst_dst_hstride(brw, inst)] *
+                reg_type_size[brw_inst_dst_reg_type(brw, inst)] == 2);
       }
 
       return;
@@ -185,22 +249,22 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
        reg.file == BRW_ARF_NULL)
       return;
 
-   assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
+   assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
    hstride = hstride_for_reg[reg.hstride];
 
    if (reg.vstride == 0xf) {
       vstride = -1;
    } else {
-      assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
+      assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
       vstride = vstride_for_reg[reg.vstride];
    }
 
-   assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
+   assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
    width = width_for_reg[reg.width];
 
-   assert(insn->header.execution_size >= 0 &&
-         insn->header.execution_size < Elements(execsize_for_reg));
-   execsize = execsize_for_reg[insn->header.execution_size];
+   assert(brw_inst_exec_size(brw, inst) >= 0 &&
+          brw_inst_exec_size(brw, inst) < ARRAY_SIZE(execsize_for_reg));
+   execsize = execsize_for_reg[brw_inst_exec_size(brw, inst)];
 
    /* Restrictions from 3.3.10: Register Region Restrictions. */
    /* 3. */
@@ -235,20 +299,28 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
    /* 10. Check destination issues. */
 }
 
+static bool
+is_compactable_immediate(unsigned imm)
+{
+   /* We get the low 12 bits as-is. */
+   imm &= ~0xfff;
+
+   /* We get one bit replicated through the top 20 bits. */
+   return imm == 0 || imm == 0xfffff000;
+}
+
 void
-brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
-            struct brw_reg reg)
+brw_set_src0(struct brw_compile *p, brw_inst *inst, struct brw_reg reg)
 {
    struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
 
-   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &reg);
 
-   if (intel->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
-                           insn->header.opcode == BRW_OPCODE_SENDC)) {
+   if (brw->gen >= 6 && (brw_inst_opcode(brw, inst) == BRW_OPCODE_SEND ||
+                         brw_inst_opcode(brw, inst) == BRW_OPCODE_SENDC)) {
       /* Any source modifiers or regions will be ignored, since this just
        * identifies the MRF/GRF to start reading the message contents from.
        * Check for some likely failures.
@@ -258,143 +330,192 @@ brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
    }
 
-   validate_reg(insn, reg);
+   validate_reg(brw, inst, reg);
 
-   insn->bits1.da1.src0_reg_file = reg.file;
-   insn->bits1.da1.src0_reg_type = reg.type;
-   insn->bits2.da1.src0_abs = reg.abs;
-   insn->bits2.da1.src0_negate = reg.negate;
-   insn->bits2.da1.src0_address_mode = reg.address_mode;
+   brw_inst_set_src0_reg_file(brw, inst, reg.file);
+   brw_inst_set_src0_reg_type(brw, inst,
+                              brw_reg_type_to_hw_type(brw, reg.type, reg.file));
+   brw_inst_set_src0_abs(brw, inst, reg.abs);
+   brw_inst_set_src0_negate(brw, inst, reg.negate);
+   brw_inst_set_src0_address_mode(brw, inst, reg.address_mode);
 
    if (reg.file == BRW_IMMEDIATE_VALUE) {
-      insn->bits3.ud = reg.dw1.ud;
-   
-      /* Required to set some fields in src1 as well:
+      brw_inst_set_imm_ud(brw, inst, reg.dw1.ud);
+
+      /* The Bspec's section titled "Non-present Operands" claims that if src0
+       * is an immediate that src1's type must be the same as that of src0.
+       *
+       * The SNB+ DataTypeIndex instruction compaction tables contain mappings
+       * that do not follow this rule. E.g., from the IVB/HSW table:
+       *
+       *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
+       *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
+       *
+       * And from the SNB table:
+       *
+       *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
+       *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
+       *
+       * Neither of these cause warnings from the simulator when used,
+       * compacted or otherwise. In fact, all compaction mappings that have an
+       * immediate in src0 use a:ud for src1.
+       *
+       * The GM45 instruction compaction tables do not contain mapped meanings
+       * so it's not clear whether it has the restriction. We'll assume it was
+       * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
        */
-      insn->bits1.da1.src1_reg_file = 0; /* arf */
-      insn->bits1.da1.src1_reg_type = reg.type;
-   }
-   else 
-   {
-      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
-        if (insn->header.access_mode == BRW_ALIGN_1) {
-           insn->bits2.da1.src0_subreg_nr = reg.subnr;
-           insn->bits2.da1.src0_reg_nr = reg.nr;
-        }
-        else {
-           insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
-           insn->bits2.da16.src0_reg_nr = reg.nr;
-        }
+      brw_inst_set_src1_reg_file(brw, inst, BRW_ARCHITECTURE_REGISTER_FILE);
+      if (brw->gen < 6) {
+         brw_inst_set_src1_reg_type(brw, inst,
+                                    brw_inst_src0_reg_type(brw, inst));
+      } else {
+         brw_inst_set_src1_reg_type(brw, inst, BRW_HW_REG_TYPE_UD);
       }
-      else {
-        insn->bits2.ia1.src0_subreg_nr = reg.subnr;
 
-        if (insn->header.access_mode == BRW_ALIGN_1) {
-           insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
-        }
-        else {
-           insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
-        }
+      /* Compacted instructions only have 12-bits (plus 1 for the other 20)
+       * for immediate values. Presumably the hardware engineers realized
+       * that the only useful floating-point value that could be represented
+       * in this format is 0.0, which can also be represented as a VF-typed
+       * immediate, so they gave us the previously mentioned mapping on IVB+.
+       *
+       * Strangely, we do have a mapping for imm:f in src1, so we don't need
+       * to do this there.
+       *
+       * If we see a 0.0:F, change the type to VF so that it can be compacted.
+       */
+      if (brw_inst_imm_ud(brw, inst) == 0x0 &&
+          brw_inst_src0_reg_type(brw, inst) == BRW_HW_REG_TYPE_F) {
+         brw_inst_set_src0_reg_type(brw, inst, BRW_HW_REG_IMM_TYPE_VF);
       }
 
-      if (insn->header.access_mode == BRW_ALIGN_1) {
-        if (reg.width == BRW_WIDTH_1 && 
-            insn->header.execution_size == BRW_EXECUTE_1) {
-           insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
-           insn->bits2.da1.src0_width = BRW_WIDTH_1;
-           insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+      /* There are no mappings for dst:d | i:d, so if the immediate is suitable
+       * set the types to :UD so the instruction can be compacted.
+       */
+      if (is_compactable_immediate(brw_inst_imm_ud(brw, inst)) &&
+          brw_inst_cond_modifier(brw, inst) == BRW_CONDITIONAL_NONE &&
+          brw_inst_src0_reg_type(brw, inst) == BRW_HW_REG_TYPE_D &&
+          brw_inst_dst_reg_type(brw, inst) == BRW_HW_REG_TYPE_D) {
+         brw_inst_set_src0_reg_type(brw, inst, BRW_HW_REG_TYPE_UD);
+         brw_inst_set_dst_reg_type(brw, inst, BRW_HW_REG_TYPE_UD);
+      }
+   } else {
+      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+         brw_inst_set_src0_da_reg_nr(brw, inst, reg.nr);
+         if (brw_inst_access_mode(brw, inst) == BRW_ALIGN_1) {
+             brw_inst_set_src0_da1_subreg_nr(brw, inst, reg.subnr);
+        } else {
+            brw_inst_set_src0_da16_subreg_nr(brw, inst, reg.subnr / 16);
         }
-        else {
-           insn->bits2.da1.src0_horiz_stride = reg.hstride;
-           insn->bits2.da1.src0_width = reg.width;
-           insn->bits2.da1.src0_vert_stride = reg.vstride;
+      } else {
+         brw_inst_set_src0_ia_subreg_nr(brw, inst, reg.subnr);
+
+         if (brw_inst_access_mode(brw, inst) == BRW_ALIGN_1) {
+            brw_inst_set_src0_ia1_addr_imm(brw, inst, reg.dw1.bits.indirect_offset);
+        } else {
+            brw_inst_set_src0_ia_subreg_nr(brw, inst, reg.dw1.bits.indirect_offset);
         }
       }
-      else {
-        insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
-        insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
-        insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
-        insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+      if (brw_inst_access_mode(brw, inst) == BRW_ALIGN_1) {
+        if (reg.width == BRW_WIDTH_1 &&
+             brw_inst_exec_size(brw, inst) == BRW_EXECUTE_1) {
+            brw_inst_set_src0_hstride(brw, inst, BRW_HORIZONTAL_STRIDE_0);
+            brw_inst_set_src0_width(brw, inst, BRW_WIDTH_1);
+            brw_inst_set_src0_vstride(brw, inst, BRW_VERTICAL_STRIDE_0);
+        } else {
+            brw_inst_set_src0_hstride(brw, inst, reg.hstride);
+            brw_inst_set_src0_width(brw, inst, reg.width);
+            brw_inst_set_src0_vstride(brw, inst, reg.vstride);
+        }
+      } else {
+         brw_inst_set_src0_da16_swiz_x(brw, inst,
+            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X));
+         brw_inst_set_src0_da16_swiz_y(brw, inst,
+            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y));
+         brw_inst_set_src0_da16_swiz_z(brw, inst,
+            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z));
+         brw_inst_set_src0_da16_swiz_w(brw, inst,
+            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W));
 
         /* This is an oddity of the fact we're using the same
          * descriptions for registers in align_16 as align_1:
          */
         if (reg.vstride == BRW_VERTICAL_STRIDE_8)
-           insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+            brw_inst_set_src0_vstride(brw, inst, BRW_VERTICAL_STRIDE_4);
         else
-           insn->bits2.da16.src0_vert_stride = reg.vstride;
+            brw_inst_set_src0_vstride(brw, inst, reg.vstride);
       }
    }
 }
 
 
-void brw_set_src1(struct brw_compile *p,
-                 struct brw_instruction *insn,
-                 struct brw_reg reg)
+void
+brw_set_src1(struct brw_compile *p, brw_inst *inst, struct brw_reg reg)
 {
-   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+   const struct brw_context *brw = p->brw;
 
-   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &reg);
+   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
-   validate_reg(insn, reg);
+   validate_reg(brw, inst, reg);
 
-   insn->bits1.da1.src1_reg_file = reg.file;
-   insn->bits1.da1.src1_reg_type = reg.type;
-   insn->bits3.da1.src1_abs = reg.abs;
-   insn->bits3.da1.src1_negate = reg.negate;
+   brw_inst_set_src1_reg_file(brw, inst, reg.file);
+   brw_inst_set_src1_reg_type(brw, inst,
+                              brw_reg_type_to_hw_type(brw, reg.type, reg.file));
+   brw_inst_set_src1_abs(brw, inst, reg.abs);
+   brw_inst_set_src1_negate(brw, inst, reg.negate);
 
    /* Only src1 can be immediate in two-argument instructions.
     */
-   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+   assert(brw_inst_src0_reg_file(brw, inst) != BRW_IMMEDIATE_VALUE);
 
    if (reg.file == BRW_IMMEDIATE_VALUE) {
-      insn->bits3.ud = reg.dw1.ud;
-   }
-   else {
+      brw_inst_set_imm_ud(brw, inst, reg.dw1.ud);
+   } else {
       /* This is a hardware restriction, which may or may not be lifted
        * in the future:
        */
       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
       /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
-      if (insn->header.access_mode == BRW_ALIGN_1) {
-        insn->bits3.da1.src1_subreg_nr = reg.subnr;
-        insn->bits3.da1.src1_reg_nr = reg.nr;
-      }
-      else {
-        insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
-        insn->bits3.da16.src1_reg_nr = reg.nr;
+      brw_inst_set_src1_da_reg_nr(brw, inst, reg.nr);
+      if (brw_inst_access_mode(brw, inst) == BRW_ALIGN_1) {
+         brw_inst_set_src1_da1_subreg_nr(brw, inst, reg.subnr);
+      } else {
+         brw_inst_set_src1_da16_subreg_nr(brw, inst, reg.subnr / 16);
       }
 
-      if (insn->header.access_mode == BRW_ALIGN_1) {
-        if (reg.width == BRW_WIDTH_1 && 
-            insn->header.execution_size == BRW_EXECUTE_1) {
-           insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
-           insn->bits3.da1.src1_width = BRW_WIDTH_1;
-           insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
-        }
-        else {
-           insn->bits3.da1.src1_horiz_stride = reg.hstride;
-           insn->bits3.da1.src1_width = reg.width;
-           insn->bits3.da1.src1_vert_stride = reg.vstride;
+      if (brw_inst_access_mode(brw, inst) == BRW_ALIGN_1) {
+        if (reg.width == BRW_WIDTH_1 &&
+             brw_inst_exec_size(brw, inst) == BRW_EXECUTE_1) {
+            brw_inst_set_src1_hstride(brw, inst, BRW_HORIZONTAL_STRIDE_0);
+            brw_inst_set_src1_width(brw, inst, BRW_WIDTH_1);
+            brw_inst_set_src1_vstride(brw, inst, BRW_VERTICAL_STRIDE_0);
+        } else {
+            brw_inst_set_src1_hstride(brw, inst, reg.hstride);
+            brw_inst_set_src1_width(brw, inst, reg.width);
+            brw_inst_set_src1_vstride(brw, inst, reg.vstride);
         }
-      }
-      else {
-        insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
-        insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
-        insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
-        insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+      } else {
+         brw_inst_set_src1_da16_swiz_x(brw, inst,
+            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X));
+         brw_inst_set_src1_da16_swiz_y(brw, inst,
+            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y));
+         brw_inst_set_src1_da16_swiz_z(brw, inst,
+            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z));
+         brw_inst_set_src1_da16_swiz_w(brw, inst,
+            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W));
 
         /* This is an oddity of the fact we're using the same
          * descriptions for registers in align_16 as align_1:
          */
         if (reg.vstride == BRW_VERTICAL_STRIDE_8)
-           insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+            brw_inst_set_src1_vstride(brw, inst, BRW_VERTICAL_STRIDE_4);
         else
-           insn->bits3.da16.src1_vert_stride = reg.vstride;
+            brw_inst_set_src1_vstride(brw, inst, reg.vstride);
       }
    }
 }
@@ -409,48 +530,46 @@ void brw_set_src1(struct brw_compile *p,
  */
 static void
 brw_set_message_descriptor(struct brw_compile *p,
-                          struct brw_instruction *inst,
+                          brw_inst *inst,
                           enum brw_message_target sfid,
                           unsigned msg_length,
                           unsigned response_length,
                           bool header_present,
                           bool end_of_thread)
 {
-   struct intel_context *intel = &p->brw->intel;
+   struct brw_context *brw = p->brw;
 
    brw_set_src1(p, inst, brw_imm_d(0));
 
-   if (intel->gen >= 5) {
-      inst->bits3.generic_gen5.header_present = header_present;
-      inst->bits3.generic_gen5.response_length = response_length;
-      inst->bits3.generic_gen5.msg_length = msg_length;
-      inst->bits3.generic_gen5.end_of_thread = end_of_thread;
+   /* For indirect sends, `inst` will not be the SEND/SENDC instruction
+    * itself; instead, it will be a MOV/OR into the address register.
+    *
+    * In this case, we avoid setting the extended message descriptor bits,
+    * since they go on the later SEND/SENDC instead and if set here would
+    * instead clobber the conditionalmod bits.
+    */
+   unsigned opcode = brw_inst_opcode(brw, inst);
+   if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) {
+      brw_inst_set_sfid(brw, inst, sfid);
+   }
 
-      if (intel->gen >= 6) {
-        /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
-        inst->header.destreg__conditionalmod = sfid;
-      } else {
-        /* Set Extended Message Descriptor (ex_desc) */
-        inst->bits2.send_gen5.sfid = sfid;
-        inst->bits2.send_gen5.end_of_thread = end_of_thread;
-      }
-   } else {
-      inst->bits3.generic.response_length = response_length;
-      inst->bits3.generic.msg_length = msg_length;
-      inst->bits3.generic.msg_target = sfid;
-      inst->bits3.generic.end_of_thread = end_of_thread;
+   brw_inst_set_mlen(brw, inst, msg_length);
+   brw_inst_set_rlen(brw, inst, response_length);
+   brw_inst_set_eot(brw, inst, end_of_thread);
+
+   if (brw->gen >= 5) {
+      brw_inst_set_header_present(brw, inst, header_present);
    }
 }
 
 static void brw_set_math_message( struct brw_compile *p,
-                                 struct brw_instruction *insn,
-                                 GLuint function,
-                                 GLuint integer_type,
+                                 brw_inst *inst,
+                                 unsigned function,
+                                 unsigned integer_type,
                                  bool low_precision,
-                                 GLuint dataType )
+                                 unsigned dataType )
 {
    struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
    unsigned msg_length;
    unsigned response_length;
 
@@ -479,107 +598,100 @@ static void brw_set_math_message( struct brw_compile *p,
    }
 
 
-   brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
+   brw_set_message_descriptor(p, inst, BRW_SFID_MATH,
                              msg_length, response_length, false, false);
-   if (intel->gen == 5) {
-      insn->bits3.math_gen5.function = function;
-      insn->bits3.math_gen5.int_type = integer_type;
-      insn->bits3.math_gen5.precision = low_precision;
-      insn->bits3.math_gen5.saturate = insn->header.saturate;
-      insn->bits3.math_gen5.data_type = dataType;
-      insn->bits3.math_gen5.snapshot = 0;
-   } else {
-      insn->bits3.math.function = function;
-      insn->bits3.math.int_type = integer_type;
-      insn->bits3.math.precision = low_precision;
-      insn->bits3.math.saturate = insn->header.saturate;
-      insn->bits3.math.data_type = dataType;
-   }
-   insn->header.saturate = 0;
+   brw_inst_set_math_msg_function(brw, inst, function);
+   brw_inst_set_math_msg_signed_int(brw, inst, integer_type);
+   brw_inst_set_math_msg_precision(brw, inst, low_precision);
+   brw_inst_set_math_msg_saturate(brw, inst, brw_inst_saturate(brw, inst));
+   brw_inst_set_math_msg_data_type(brw, inst, dataType);
+   brw_inst_set_saturate(brw, inst, 0);
 }
 
 
 static void brw_set_ff_sync_message(struct brw_compile *p,
-                                   struct brw_instruction *insn,
+                                   brw_inst *insn,
                                    bool allocate,
-                                   GLuint response_length,
+                                   unsigned response_length,
                                    bool end_of_thread)
 {
+   const struct brw_context *brw = p->brw;
+
    brw_set_message_descriptor(p, insn, BRW_SFID_URB,
                              1, response_length, true, end_of_thread);
-   insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
-   insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
-   insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
-   insn->bits3.urb_gen5.allocate = allocate;
-   insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
-   insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
+   brw_inst_set_urb_opcode(brw, insn, 1); /* FF_SYNC */
+   brw_inst_set_urb_allocate(brw, insn, allocate);
+   /* The following fields are not used by FF_SYNC: */
+   brw_inst_set_urb_global_offset(brw, insn, 0);
+   brw_inst_set_urb_swizzle_control(brw, insn, 0);
+   brw_inst_set_urb_used(brw, insn, 0);
+   brw_inst_set_urb_complete(brw, insn, 0);
 }
 
 static void brw_set_urb_message( struct brw_compile *p,
-                                struct brw_instruction *insn,
-                                bool allocate,
-                                bool used,
-                                GLuint msg_length,
-                                GLuint response_length,
-                                bool end_of_thread,
-                                bool complete,
-                                GLuint offset,
-                                GLuint swizzle_control )
+                                brw_inst *insn,
+                                 enum brw_urb_write_flags flags,
+                                unsigned msg_length,
+                                unsigned response_length,
+                                unsigned offset,
+                                unsigned swizzle_control )
 {
    struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
+
+   assert(brw->gen < 7 || swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
+   assert(brw->gen < 7 || !(flags & BRW_URB_WRITE_ALLOCATE));
+   assert(brw->gen >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
 
    brw_set_message_descriptor(p, insn, BRW_SFID_URB,
-                             msg_length, response_length, true, end_of_thread);
-   if (intel->gen == 7) {
-      insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
-      insn->bits3.urb_gen7.offset = offset;
-      assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
-      insn->bits3.urb_gen7.swizzle_control = swizzle_control;
-      /* per_slot_offset = 0 makes it ignore offsets in message header */
-      insn->bits3.urb_gen7.per_slot_offset = 0;
-      insn->bits3.urb_gen7.complete = complete;
-   } else if (intel->gen >= 5) {
-      insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
-      insn->bits3.urb_gen5.offset = offset;
-      insn->bits3.urb_gen5.swizzle_control = swizzle_control;
-      insn->bits3.urb_gen5.allocate = allocate;
-      insn->bits3.urb_gen5.used = used;        /* ? */
-      insn->bits3.urb_gen5.complete = complete;
+                             msg_length, response_length, true,
+                              flags & BRW_URB_WRITE_EOT);
+
+   if (flags & BRW_URB_WRITE_OWORD) {
+      assert(msg_length == 2); /* header + one OWORD of data */
+      brw_inst_set_urb_opcode(brw, insn, BRW_URB_OPCODE_WRITE_OWORD);
+   } else {
+      brw_inst_set_urb_opcode(brw, insn, BRW_URB_OPCODE_WRITE_HWORD);
+   }
+
+   brw_inst_set_urb_global_offset(brw, insn, offset);
+   brw_inst_set_urb_swizzle_control(brw, insn, swizzle_control);
+
+   if (brw->gen < 8) {
+      brw_inst_set_urb_complete(brw, insn, !!(flags & BRW_URB_WRITE_COMPLETE));
+   }
+
+   if (brw->gen < 7) {
+      brw_inst_set_urb_allocate(brw, insn, !!(flags & BRW_URB_WRITE_ALLOCATE));
+      brw_inst_set_urb_used(brw, insn, !(flags & BRW_URB_WRITE_UNUSED));
    } else {
-      insn->bits3.urb.opcode = 0;      /* ? */
-      insn->bits3.urb.offset = offset;
-      insn->bits3.urb.swizzle_control = swizzle_control;
-      insn->bits3.urb.allocate = allocate;
-      insn->bits3.urb.used = used;     /* ? */
-      insn->bits3.urb.complete = complete;
+      brw_inst_set_urb_per_slot_offset(brw, insn,
+         !!(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
    }
 }
 
 void
 brw_set_dp_write_message(struct brw_compile *p,
-                        struct brw_instruction *insn,
-                        GLuint binding_table_index,
-                        GLuint msg_control,
-                        GLuint msg_type,
-                        GLuint msg_length,
+                        brw_inst *insn,
+                        unsigned binding_table_index,
+                        unsigned msg_control,
+                        unsigned msg_type,
+                        unsigned msg_length,
                         bool header_present,
-                        GLuint last_render_target,
-                        GLuint response_length,
-                        GLuint end_of_thread,
-                        GLuint send_commit_msg)
+                        unsigned last_render_target,
+                        unsigned response_length,
+                        unsigned end_of_thread,
+                        unsigned send_commit_msg)
 {
    struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
    unsigned sfid;
 
-   if (intel->gen >= 7) {
+   if (brw->gen >= 7) {
       /* Use the Render Cache for RT writes; otherwise use the Data Cache */
       if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
         sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
       else
         sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
-   } else if (intel->gen == 6) {
+   } else if (brw->gen == 6) {
       /* Use the render cache for all write messages. */
       sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
    } else {
@@ -589,50 +701,32 @@ brw_set_dp_write_message(struct brw_compile *p,
    brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
                              header_present, end_of_thread);
 
-   if (intel->gen >= 7) {
-      insn->bits3.gen7_dp.binding_table_index = binding_table_index;
-      insn->bits3.gen7_dp.msg_control = msg_control;
-      insn->bits3.gen7_dp.last_render_target = last_render_target;
-      insn->bits3.gen7_dp.msg_type = msg_type;
-   } else if (intel->gen == 6) {
-      insn->bits3.gen6_dp.binding_table_index = binding_table_index;
-      insn->bits3.gen6_dp.msg_control = msg_control;
-      insn->bits3.gen6_dp.last_render_target = last_render_target;
-      insn->bits3.gen6_dp.msg_type = msg_type;
-      insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
-   } else if (intel->gen == 5) {
-      insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
-      insn->bits3.dp_write_gen5.msg_control = msg_control;
-      insn->bits3.dp_write_gen5.last_render_target = last_render_target;
-      insn->bits3.dp_write_gen5.msg_type = msg_type;
-      insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
-   } else {
-      insn->bits3.dp_write.binding_table_index = binding_table_index;
-      insn->bits3.dp_write.msg_control = msg_control;
-      insn->bits3.dp_write.last_render_target = last_render_target;
-      insn->bits3.dp_write.msg_type = msg_type;
-      insn->bits3.dp_write.send_commit_msg = send_commit_msg;
+   brw_inst_set_binding_table_index(brw, insn, binding_table_index);
+   brw_inst_set_dp_write_msg_type(brw, insn, msg_type);
+   brw_inst_set_dp_write_msg_control(brw, insn, msg_control);
+   brw_inst_set_rt_last(brw, insn, last_render_target);
+   if (brw->gen < 7) {
+      brw_inst_set_dp_write_commit(brw, insn, send_commit_msg);
    }
 }
 
 void
 brw_set_dp_read_message(struct brw_compile *p,
-                       struct brw_instruction *insn,
-                       GLuint binding_table_index,
-                       GLuint msg_control,
-                       GLuint msg_type,
-                       GLuint target_cache,
-                       GLuint msg_length,
+                       brw_inst *insn,
+                       unsigned binding_table_index,
+                       unsigned msg_control,
+                       unsigned msg_type,
+                       unsigned target_cache,
+                       unsigned msg_length,
                         bool header_present,
-                       GLuint response_length)
+                       unsigned response_length)
 {
    struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
    unsigned sfid;
 
-   if (intel->gen >= 7) {
+   if (brw->gen >= 7) {
       sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
-   } else if (intel->gen == 6) {
+   } else if (brw->gen == 6) {
       if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
         sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
       else
@@ -644,126 +738,115 @@ brw_set_dp_read_message(struct brw_compile *p,
    brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
                              header_present, false);
 
-   if (intel->gen >= 7) {
-      insn->bits3.gen7_dp.binding_table_index = binding_table_index;
-      insn->bits3.gen7_dp.msg_control = msg_control;
-      insn->bits3.gen7_dp.last_render_target = 0;
-      insn->bits3.gen7_dp.msg_type = msg_type;
-   } else if (intel->gen == 6) {
-      insn->bits3.gen6_dp.binding_table_index = binding_table_index;
-      insn->bits3.gen6_dp.msg_control = msg_control;
-      insn->bits3.gen6_dp.last_render_target = 0;
-      insn->bits3.gen6_dp.msg_type = msg_type;
-      insn->bits3.gen6_dp.send_commit_msg = 0;
-   } else if (intel->gen == 5) {
-      insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
-      insn->bits3.dp_read_gen5.msg_control = msg_control;
-      insn->bits3.dp_read_gen5.msg_type = msg_type;
-      insn->bits3.dp_read_gen5.target_cache = target_cache;
-   } else if (intel->is_g4x) {
-      insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
-      insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
-      insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
-      insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
-   } else {
-      insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
-      insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
-      insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
-      insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
-   }
+   brw_inst_set_binding_table_index(brw, insn, binding_table_index);
+   brw_inst_set_dp_read_msg_type(brw, insn, msg_type);
+   brw_inst_set_dp_read_msg_control(brw, insn, msg_control);
+   if (brw->gen < 6)
+      brw_inst_set_dp_read_target_cache(brw, insn, target_cache);
 }
 
 void
 brw_set_sampler_message(struct brw_compile *p,
-                        struct brw_instruction *insn,
-                        GLuint binding_table_index,
-                        GLuint sampler,
-                        GLuint msg_type,
-                        GLuint response_length,
-                        GLuint msg_length,
-                        GLuint header_present,
-                        GLuint simd_mode,
-                        GLuint return_format)
+                        brw_inst *inst,
+                        unsigned binding_table_index,
+                        unsigned sampler,
+                        unsigned msg_type,
+                        unsigned response_length,
+                        unsigned msg_length,
+                        unsigned header_present,
+                        unsigned simd_mode,
+                        unsigned return_format)
 {
    struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
 
-   brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
+   brw_set_message_descriptor(p, inst, BRW_SFID_SAMPLER, msg_length,
                              response_length, header_present, false);
 
-   if (intel->gen >= 7) {
-      insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
-      insn->bits3.sampler_gen7.sampler = sampler;
-      insn->bits3.sampler_gen7.msg_type = msg_type;
-      insn->bits3.sampler_gen7.simd_mode = simd_mode;
-   } else if (intel->gen >= 5) {
-      insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
-      insn->bits3.sampler_gen5.sampler = sampler;
-      insn->bits3.sampler_gen5.msg_type = msg_type;
-      insn->bits3.sampler_gen5.simd_mode = simd_mode;
-   } else if (intel->is_g4x) {
-      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
-      insn->bits3.sampler_g4x.sampler = sampler;
-      insn->bits3.sampler_g4x.msg_type = msg_type;
-   } else {
-      insn->bits3.sampler.binding_table_index = binding_table_index;
-      insn->bits3.sampler.sampler = sampler;
-      insn->bits3.sampler.msg_type = msg_type;
-      insn->bits3.sampler.return_format = return_format;
+   brw_inst_set_binding_table_index(brw, inst, binding_table_index);
+   brw_inst_set_sampler(brw, inst, sampler);
+   brw_inst_set_sampler_msg_type(brw, inst, msg_type);
+   if (brw->gen >= 5) {
+      brw_inst_set_sampler_simd_mode(brw, inst, simd_mode);
+   } else if (brw->gen == 4 && !brw->is_g4x) {
+      brw_inst_set_sampler_return_format(brw, inst, return_format);
    }
 }
 
+void brw_set_indirect_send_descriptor(struct brw_compile *p,
+                                      brw_inst *insn,
+                                      unsigned sfid,
+                                      struct brw_reg descriptor)
+{
+   /* Only a0.0 may be used as SEND's descriptor operand. */
+   assert(descriptor.file == BRW_ARCHITECTURE_REGISTER_FILE);
+   assert(descriptor.type == BRW_REGISTER_TYPE_UD);
+   assert(descriptor.nr == BRW_ARF_ADDRESS);
+   assert(descriptor.subnr == 0);
+
+   brw_set_message_descriptor(p, insn, sfid, 0, 0, false, false);
+   brw_set_src1(p, insn, descriptor);
+}
+
+static void
+gen7_set_dp_scratch_message(struct brw_compile *p,
+                            brw_inst *inst,
+                            bool write,
+                            bool dword,
+                            bool invalidate_after_read,
+                            unsigned num_regs,
+                            unsigned addr_offset,
+                            unsigned mlen,
+                            unsigned rlen,
+                            bool header_present)
+{
+   const struct brw_context *brw = p->brw;
+   assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
+          (brw->gen >= 8 && num_regs == 8));
+   brw_set_message_descriptor(p, inst, GEN7_SFID_DATAPORT_DATA_CACHE,
+                              mlen, rlen, header_present, false);
+   brw_inst_set_dp_category(brw, inst, 1); /* Scratch Block Read/Write msgs */
+   brw_inst_set_scratch_read_write(brw, inst, write);
+   brw_inst_set_scratch_type(brw, inst, dword);
+   brw_inst_set_scratch_invalidate_after_read(brw, inst, invalidate_after_read);
+   brw_inst_set_scratch_block_size(brw, inst, ffs(num_regs) - 1);
+   brw_inst_set_scratch_addr_offset(brw, inst, addr_offset);
+}
 
 #define next_insn brw_next_insn
-struct brw_instruction *
-brw_next_insn(struct brw_compile *p, GLuint opcode)
+brw_inst *
+brw_next_insn(struct brw_compile *p, unsigned opcode)
 {
-   struct brw_instruction *insn;
+   const struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
    if (p->nr_insn + 1 > p->store_size) {
-      if (0)
-         printf("incresing the store size to %d\n", p->store_size << 1);
       p->store_size <<= 1;
-      p->store = reralloc(p->mem_ctx, p->store,
-                          struct brw_instruction, p->store_size);
-      if (!p->store)
-         assert(!"realloc eu store memeory failed");
+      p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
    }
 
    p->next_insn_offset += 16;
    insn = &p->store[p->nr_insn++];
    memcpy(insn, p->current, sizeof(*insn));
 
-   /* Reset this one-shot flag: 
-    */
-
-   if (p->current->header.destreg__conditionalmod) {
-      p->current->header.destreg__conditionalmod = 0;
-      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
-   }
-
-   insn->header.opcode = opcode;
+   brw_inst_set_opcode(brw, insn, opcode);
    return insn;
 }
 
-static struct brw_instruction *brw_alu1( struct brw_compile *p,
-                                        GLuint opcode,
-                                        struct brw_reg dest,
-                                        struct brw_reg src )
+static brw_inst *
+brw_alu1(struct brw_compile *p, unsigned opcode,
+         struct brw_reg dest, struct brw_reg src)
 {
-   struct brw_instruction *insn = next_insn(p, opcode);
+   brw_inst *insn = next_insn(p, opcode);
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src);
    return insn;
 }
 
-static struct brw_instruction *brw_alu2(struct brw_compile *p,
-                                       GLuint opcode,
-                                       struct brw_reg dest,
-                                       struct brw_reg src0,
-                                       struct brw_reg src1 )
+static brw_inst *
+brw_alu2(struct brw_compile *p, unsigned opcode,
+         struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
 {
-   struct brw_instruction *insn = next_insn(p, opcode);   
+   brw_inst *insn = next_insn(p, opcode);
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
    brw_set_src1(p, insn, src1);
@@ -781,19 +864,16 @@ get_3src_subreg_nr(struct brw_reg reg)
    }
 }
 
-static struct brw_instruction *brw_alu3(struct brw_compile *p,
-                                       GLuint opcode,
-                                       struct brw_reg dest,
-                                       struct brw_reg src0,
-                                       struct brw_reg src1,
-                                       struct brw_reg src2)
+static brw_inst *
+brw_alu3(struct brw_compile *p, unsigned opcode, struct brw_reg dest,
+         struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn = next_insn(p, opcode);
+   struct brw_context *brw = p->brw;
+   brw_inst *inst = next_insn(p, opcode);
 
    gen7_convert_mrf_to_grf(p, &dest);
 
-   assert(insn->header.access_mode == BRW_ALIGN_16);
+   assert(brw_inst_access_mode(brw, inst) == BRW_ALIGN_16);
 
    assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
          dest.file == BRW_MESSAGE_REGISTER_FILE);
@@ -802,44 +882,49 @@ static struct brw_instruction *brw_alu3(struct brw_compile *p,
    assert(dest.type == BRW_REGISTER_TYPE_F ||
           dest.type == BRW_REGISTER_TYPE_D ||
           dest.type == BRW_REGISTER_TYPE_UD);
-   insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
-   insn->bits1.da3src.dest_reg_nr = dest.nr;
-   insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
-   insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
-   guess_execution_size(p, insn, dest);
+   if (brw->gen == 6) {
+      brw_inst_set_3src_dst_reg_file(brw, inst,
+                                     dest.file == BRW_MESSAGE_REGISTER_FILE);
+   }
+   brw_inst_set_3src_dst_reg_nr(brw, inst, dest.nr);
+   brw_inst_set_3src_dst_subreg_nr(brw, inst, dest.subnr / 16);
+   brw_inst_set_3src_dst_writemask(brw, inst, dest.dw1.bits.writemask);
+   guess_execution_size(p, inst, dest);
 
    assert(src0.file == BRW_GENERAL_REGISTER_FILE);
    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
    assert(src0.nr < 128);
-   insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
-   insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
-   insn->bits2.da3src.src0_reg_nr = src0.nr;
-   insn->bits1.da3src.src0_abs = src0.abs;
-   insn->bits1.da3src.src0_negate = src0.negate;
-   insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
+   brw_inst_set_3src_src0_swizzle(brw, inst, src0.dw1.bits.swizzle);
+   brw_inst_set_3src_src0_subreg_nr(brw, inst, get_3src_subreg_nr(src0));
+   brw_inst_set_3src_src0_reg_nr(brw, inst, src0.nr);
+   brw_inst_set_3src_src0_abs(brw, inst, src0.abs);
+   brw_inst_set_3src_src0_negate(brw, inst, src0.negate);
+   brw_inst_set_3src_src0_rep_ctrl(brw, inst,
+                                   src0.vstride == BRW_VERTICAL_STRIDE_0);
 
    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
    assert(src1.nr < 128);
-   insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
-   insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
-   insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
-   insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
-   insn->bits3.da3src.src1_reg_nr = src1.nr;
-   insn->bits1.da3src.src1_abs = src1.abs;
-   insn->bits1.da3src.src1_negate = src1.negate;
+   brw_inst_set_3src_src1_swizzle(brw, inst, src1.dw1.bits.swizzle);
+   brw_inst_set_3src_src1_subreg_nr(brw, inst, get_3src_subreg_nr(src1));
+   brw_inst_set_3src_src1_reg_nr(brw, inst, src1.nr);
+   brw_inst_set_3src_src1_abs(brw, inst, src1.abs);
+   brw_inst_set_3src_src1_negate(brw, inst, src1.negate);
+   brw_inst_set_3src_src1_rep_ctrl(brw, inst,
+                                   src1.vstride == BRW_VERTICAL_STRIDE_0);
 
    assert(src2.file == BRW_GENERAL_REGISTER_FILE);
    assert(src2.address_mode == BRW_ADDRESS_DIRECT);
    assert(src2.nr < 128);
-   insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
-   insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
-   insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
-   insn->bits3.da3src.src2_reg_nr = src2.nr;
-   insn->bits1.da3src.src2_abs = src2.abs;
-   insn->bits1.da3src.src2_negate = src2.negate;
-
-   if (intel->gen >= 7) {
+   brw_inst_set_3src_src2_swizzle(brw, inst, src2.dw1.bits.swizzle);
+   brw_inst_set_3src_src2_subreg_nr(brw, inst, get_3src_subreg_nr(src2));
+   brw_inst_set_3src_src2_reg_nr(brw, inst, src2.nr);
+   brw_inst_set_3src_src2_abs(brw, inst, src2.abs);
+   brw_inst_set_3src_src2_negate(brw, inst, src2.negate);
+   brw_inst_set_3src_src2_rep_ctrl(brw, inst,
+                                   src2.vstride == BRW_VERTICAL_STRIDE_0);
+
+   if (brw->gen >= 7) {
       /* Set both the source and destination types based on dest.type,
        * ignoring the source register types.  The MAD and LRP emitters ensure
        * that all four types are float.  The BFE and BFI2 emitters, however,
@@ -848,21 +933,21 @@ static struct brw_instruction *brw_alu3(struct brw_compile *p,
        */
       switch (dest.type) {
       case BRW_REGISTER_TYPE_F:
-         insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
-         insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
+         brw_inst_set_3src_src_type(brw, inst, BRW_3SRC_TYPE_F);
+         brw_inst_set_3src_dst_type(brw, inst, BRW_3SRC_TYPE_F);
          break;
       case BRW_REGISTER_TYPE_D:
-         insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
-         insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
+         brw_inst_set_3src_src_type(brw, inst, BRW_3SRC_TYPE_D);
+         brw_inst_set_3src_dst_type(brw, inst, BRW_3SRC_TYPE_D);
          break;
       case BRW_REGISTER_TYPE_UD:
-         insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
-         insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
+         brw_inst_set_3src_src_type(brw, inst, BRW_3SRC_TYPE_UD);
+         brw_inst_set_3src_dst_type(brw, inst, BRW_3SRC_TYPE_UD);
          break;
       }
    }
 
-   return insn;
+   return inst;
 }
 
 
@@ -870,7 +955,7 @@ static struct brw_instruction *brw_alu3(struct brw_compile *p,
  * Convenience routines.
  */
 #define ALU1(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
+brw_inst *brw_##OP(struct brw_compile *p,              \
              struct brw_reg dest,                      \
              struct brw_reg src0)                      \
 {                                                      \
@@ -878,7 +963,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p,     \
 }
 
 #define ALU2(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
+brw_inst *brw_##OP(struct brw_compile *p,              \
              struct brw_reg dest,                      \
              struct brw_reg src0,                      \
              struct brw_reg src1)                      \
@@ -887,7 +972,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p,     \
 }
 
 #define ALU3(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
+brw_inst *brw_##OP(struct brw_compile *p,              \
              struct brw_reg dest,                      \
              struct brw_reg src0,                      \
              struct brw_reg src1,                      \
@@ -897,7 +982,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p,     \
 }
 
 #define ALU3F(OP)                                               \
-struct brw_instruction *brw_##OP(struct brw_compile *p,         \
+brw_inst *brw_##OP(struct brw_compile *p,         \
                                  struct brw_reg dest,           \
                                  struct brw_reg src0,           \
                                  struct brw_reg src1,           \
@@ -922,16 +1007,17 @@ void brw_##OP(struct brw_compile *p,                                           \
              struct brw_reg dest,                                            \
              struct brw_reg src)                                             \
 {                                                                            \
-   struct brw_instruction *rnd, *add;                                        \
+   struct brw_context *brw = p->brw;                                         \
+   brw_inst *rnd, *add;                                                              \
    rnd = next_insn(p, BRW_OPCODE_##OP);                                              \
    brw_set_dest(p, rnd, dest);                                               \
    brw_set_src0(p, rnd, src);                                                \
                                                                              \
-   if (p->brw->intel.gen < 6) {                                                      \
+   if (brw->gen < 6) {                                                       \
       /* turn on round-increments */                                         \
-      rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R;               \
+      brw_inst_set_cond_modifier(brw, rnd, BRW_CONDITIONAL_R);                \
       add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));                         \
-      add->header.predicate_control = BRW_PREDICATE_NORMAL;                  \
+      brw_inst_set_pred_control(brw, add, BRW_PREDICATE_NORMAL);              \
    }                                                                         \
 }
 
@@ -944,11 +1030,7 @@ ALU2(OR)
 ALU2(XOR)
 ALU2(SHR)
 ALU2(SHL)
-ALU2(RSR)
-ALU2(RSL)
 ALU2(ASR)
-ALU1(F32TO16)
-ALU1(F16TO32)
 ALU1(FRC)
 ALU1(RNDD)
 ALU2(MAC)
@@ -958,7 +1040,6 @@ ALU2(DP4)
 ALU2(DPH)
 ALU2(DP3)
 ALU2(DP2)
-ALU2(LINE)
 ALU2(PLN)
 ALU3F(MAD)
 ALU3F(LRP)
@@ -969,15 +1050,16 @@ ALU3(BFI2)
 ALU1(FBH)
 ALU1(FBL)
 ALU1(CBIT)
+ALU2(ADDC)
+ALU2(SUBB)
 
 ROUND(RNDZ)
 ROUND(RNDE)
 
 
-struct brw_instruction *brw_ADD(struct brw_compile *p,
-                               struct brw_reg dest,
-                               struct brw_reg src0,
-                               struct brw_reg src1)
+brw_inst *
+brw_ADD(struct brw_compile *p, struct brw_reg dest,
+        struct brw_reg src0, struct brw_reg src1)
 {
    /* 6.2.2: add */
    if (src0.type == BRW_REGISTER_TYPE_F ||
@@ -997,10 +1079,9 @@ struct brw_instruction *brw_ADD(struct brw_compile *p,
    return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
 }
 
-struct brw_instruction *brw_AVG(struct brw_compile *p,
-                                struct brw_reg dest,
-                                struct brw_reg src0,
-                                struct brw_reg src1)
+brw_inst *
+brw_AVG(struct brw_compile *p, struct brw_reg dest,
+        struct brw_reg src0, struct brw_reg src1)
 {
    assert(dest.type == src0.type);
    assert(src0.type == src1.type);
@@ -1013,16 +1094,15 @@ struct brw_instruction *brw_AVG(struct brw_compile *p,
    case BRW_REGISTER_TYPE_UD:
       break;
    default:
-      assert(!"Bad type for brw_AVG");
+      unreachable("Bad type for brw_AVG");
    }
 
    return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
 }
 
-struct brw_instruction *brw_MUL(struct brw_compile *p,
-                               struct brw_reg dest,
-                               struct brw_reg src0,
-                               struct brw_reg src1)
+brw_inst *
+brw_MUL(struct brw_compile *p, struct brw_reg dest,
+        struct brw_reg src0, struct brw_reg src1)
 {
    /* 6.32.38: mul */
    if (src0.type == BRW_REGISTER_TYPE_D ||
@@ -1054,10 +1134,98 @@ struct brw_instruction *brw_MUL(struct brw_compile *p,
    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
 }
 
+brw_inst *
+brw_LINE(struct brw_compile *p, struct brw_reg dest,
+         struct brw_reg src0, struct brw_reg src1)
+{
+   src0.vstride = BRW_VERTICAL_STRIDE_0;
+   src0.width = BRW_WIDTH_1;
+   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
+   return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
+}
+
+brw_inst *
+brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
+{
+   const struct brw_context *brw = p->brw;
+   const bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+   /* The F32TO16 instruction doesn't support 32-bit destination types in
+    * Align1 mode, and neither does the Gen8 implementation in terms of a
+    * converting MOV.  Gen7 does zero out the high 16 bits in Align16 mode as
+    * an undocumented feature.
+    */
+   const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
+                                 (!align16 || brw->gen >= 8));
+   brw_inst *inst;
+
+   if (align16) {
+      assert(dst.type == BRW_REGISTER_TYPE_UD);
+   } else {
+      assert(dst.type == BRW_REGISTER_TYPE_UD ||
+             dst.type == BRW_REGISTER_TYPE_W ||
+             dst.type == BRW_REGISTER_TYPE_UW ||
+             dst.type == BRW_REGISTER_TYPE_HF);
+   }
+
+   brw_push_insn_state(p);
+
+   if (needs_zero_fill) {
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
+   }
+
+   if (brw->gen >= 8) {
+      inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
+   } else {
+      assert(brw->gen == 7);
+      inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+   }
+
+   if (needs_zero_fill) {
+      brw_inst_set_no_dd_clear(brw, inst, true);
+      inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u));
+      brw_inst_set_no_dd_check(brw, inst, true);
+   }
+
+   brw_pop_insn_state(p);
+   return inst;
+}
+
+brw_inst *
+brw_F16TO32(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
+{
+   const struct brw_context *brw = p->brw;
+   bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+
+   if (align16) {
+      assert(src.type == BRW_REGISTER_TYPE_UD);
+   } else {
+      /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
+       *
+       *   Because this instruction does not have a 16-bit floating-point
+       *   type, the source data type must be Word (W). The destination type
+       *   must be F (Float).
+       */
+      if (src.type == BRW_REGISTER_TYPE_UD)
+         src = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
+
+      assert(src.type == BRW_REGISTER_TYPE_W ||
+             src.type == BRW_REGISTER_TYPE_UW ||
+             src.type == BRW_REGISTER_TYPE_HF);
+   }
+
+   if (brw->gen >= 8) {
+      return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF));
+   } else {
+      assert(brw->gen == 7);
+      return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src);
+   }
+}
+
 
 void brw_NOP(struct brw_compile *p)
 {
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);   
+   brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
    brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
    brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
    brw_set_src1(p, insn, brw_imm_ud(0x0));
@@ -1071,24 +1239,24 @@ void brw_NOP(struct brw_compile *p)
  * Comparisons, if/else/endif
  */
 
-struct brw_instruction *brw_JMPI(struct brw_compile *p, 
-                                 struct brw_reg dest,
-                                 struct brw_reg src0,
-                                 struct brw_reg src1)
+brw_inst *
+brw_JMPI(struct brw_compile *p, struct brw_reg index,
+         unsigned predicate_control)
 {
-   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
-
-   insn->header.execution_size = 1;
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.mask_control = BRW_MASK_DISABLE;
+   const struct brw_context *brw = p->brw;
+   struct brw_reg ip = brw_ip_reg();
+   brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
 
-   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+   brw_inst_set_exec_size(brw, inst, BRW_EXECUTE_2);
+   brw_inst_set_qtr_control(brw, inst, BRW_COMPRESSION_NONE);
+   brw_inst_set_mask_control(brw, inst, BRW_MASK_DISABLE);
+   brw_inst_set_pred_control(brw, inst, predicate_control);
 
-   return insn;
+   return inst;
 }
 
 static void
-push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
+push_if_stack(struct brw_compile *p, brw_inst *inst)
 {
    p->if_stack[p->if_stack_depth] = inst - p->store;
 
@@ -1100,7 +1268,7 @@ push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
    }
 }
 
-static struct brw_instruction *
+static brw_inst *
 pop_if_stack(struct brw_compile *p)
 {
    p->if_stack_depth--;
@@ -1108,7 +1276,7 @@ pop_if_stack(struct brw_compile *p)
 }
 
 static void
-push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
+push_loop_stack(struct brw_compile *p, brw_inst *inst)
 {
    if (p->loop_stack_array_size < p->loop_stack_depth) {
       p->loop_stack_array_size *= 2;
@@ -1123,7 +1291,7 @@ push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
    p->if_depth_in_loop[p->loop_stack_depth] = 0;
 }
 
-static struct brw_instruction *
+static brw_inst *
 get_inner_do_insn(struct brw_compile *p)
 {
    return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
@@ -1142,41 +1310,44 @@ get_inner_do_insn(struct brw_compile *p)
  * When the matching 'endif' instruction is reached, the flags are
  * popped off.  If the stack is now empty, normal execution resumes.
  */
-struct brw_instruction *
-brw_IF(struct brw_compile *p, GLuint execute_size)
+brw_inst *
+brw_IF(struct brw_compile *p, unsigned execute_size)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
+   struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_IF);
 
    /* Override the defaults for this instruction:
     */
-   if (intel->gen < 6) {
+   if (brw->gen < 6) {
       brw_set_dest(p, insn, brw_ip_reg());
       brw_set_src0(p, insn, brw_ip_reg());
       brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else if (intel->gen == 6) {
+   } else if (brw->gen == 6) {
       brw_set_dest(p, insn, brw_imm_w(0));
-      insn->bits1.branch_gen6.jump_count = 0;
+      brw_inst_set_gen6_jump_count(brw, insn, 0);
       brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
       brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
-   } else {
+   } else if (brw->gen == 7) {
       brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
       brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
-      brw_set_src1(p, insn, brw_imm_ud(0));
-      insn->bits3.break_cont.jip = 0;
-      insn->bits3.break_cont.uip = 0;
+      brw_set_src1(p, insn, brw_imm_w(0));
+      brw_inst_set_jip(brw, insn, 0);
+      brw_inst_set_uip(brw, insn, 0);
+   } else {
+      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+      brw_set_src0(p, insn, brw_imm_d(0));
+      brw_inst_set_jip(brw, insn, 0);
+      brw_inst_set_uip(brw, insn, 0);
    }
 
-   insn->header.execution_size = execute_size;
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
-   insn->header.mask_control = BRW_MASK_ENABLE;
-   if (!p->single_program_flow)
-      insn->header.thread_control = BRW_THREAD_SWITCH;
-
-   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+   brw_inst_set_exec_size(brw, insn, execute_size);
+   brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
+   brw_inst_set_pred_control(brw, insn, BRW_PREDICATE_NORMAL);
+   brw_inst_set_mask_control(brw, insn, BRW_MASK_ENABLE);
+   if (!p->single_program_flow && brw->gen < 6)
+      brw_inst_set_thread_control(brw, insn, BRW_THREAD_SWITCH);
 
    push_if_stack(p, insn);
    p->if_depth_in_loop[p->loop_stack_depth]++;
@@ -1186,30 +1357,25 @@ brw_IF(struct brw_compile *p, GLuint execute_size)
 /* This function is only used for gen6-style IF instructions with an
  * embedded comparison (conditional modifier).  It is not used on gen7.
  */
-struct brw_instruction *
-gen6_IF(struct brw_compile *p, uint32_t conditional,
+brw_inst *
+gen6_IF(struct brw_compile *p, enum brw_conditional_mod conditional,
        struct brw_reg src0, struct brw_reg src1)
 {
-   struct brw_instruction *insn;
+   const struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_IF);
 
    brw_set_dest(p, insn, brw_imm_w(0));
-   if (p->compressed) {
-      insn->header.execution_size = BRW_EXECUTE_16;
-   } else {
-      insn->header.execution_size = BRW_EXECUTE_8;
-   }
-   insn->bits1.branch_gen6.jump_count = 0;
+   brw_inst_set_exec_size(brw, insn, p->compressed ? BRW_EXECUTE_16
+                                                   : BRW_EXECUTE_8);
+   brw_inst_set_gen6_jump_count(brw, insn, 0);
    brw_set_src0(p, insn, src0);
    brw_set_src1(p, insn, src1);
 
-   assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
-   assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
-   insn->header.destreg__conditionalmod = conditional;
-
-   if (!p->single_program_flow)
-      insn->header.thread_control = BRW_THREAD_SWITCH;
+   assert(brw_inst_qtr_control(brw, insn) == BRW_COMPRESSION_NONE);
+   assert(brw_inst_pred_control(brw, insn) == BRW_PREDICATE_NONE);
+   brw_inst_set_cond_modifier(brw, insn, conditional);
 
    push_if_stack(p, insn);
    return insn;
@@ -1220,16 +1386,17 @@ gen6_IF(struct brw_compile *p, uint32_t conditional,
  */
 static void
 convert_IF_ELSE_to_ADD(struct brw_compile *p,
-                      struct brw_instruction *if_inst,
-                      struct brw_instruction *else_inst)
+                       brw_inst *if_inst, brw_inst *else_inst)
 {
+   const struct brw_context *brw = p->brw;
+
    /* The next instruction (where the ENDIF would be, if it existed) */
-   struct brw_instruction *next_inst = &p->store[p->nr_insn];
+   brw_inst *next_inst = &p->store[p->nr_insn];
 
    assert(p->single_program_flow);
-   assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
-   assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
-   assert(if_inst->header.execution_size == BRW_EXECUTE_1);
+   assert(if_inst != NULL && brw_inst_opcode(brw, if_inst) == BRW_OPCODE_IF);
+   assert(else_inst == NULL || brw_inst_opcode(brw, else_inst) == BRW_OPCODE_ELSE);
+   assert(brw_inst_exec_size(brw, if_inst) == BRW_EXECUTE_1);
 
    /* Convert IF to an ADD instruction that moves the instruction pointer
     * to the first instruction of the ELSE block.  If there is no ELSE
@@ -1239,19 +1406,19 @@ convert_IF_ELSE_to_ADD(struct brw_compile *p,
     * stack operations, and if we're currently executing, we just want to
     * continue normally.
     */
-   if_inst->header.opcode = BRW_OPCODE_ADD;
-   if_inst->header.predicate_inverse = 1;
+   brw_inst_set_opcode(brw, if_inst, BRW_OPCODE_ADD);
+   brw_inst_set_pred_inv(brw, if_inst, true);
 
    if (else_inst != NULL) {
       /* Convert ELSE to an ADD instruction that points where the ENDIF
        * would be.
        */
-      else_inst->header.opcode = BRW_OPCODE_ADD;
+      brw_inst_set_opcode(brw, else_inst, BRW_OPCODE_ADD);
 
-      if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
-      else_inst->bits3.ud = (next_inst - else_inst) * 16;
+      brw_inst_set_imm_ud(brw, if_inst, (else_inst - if_inst + 1) * 16);
+      brw_inst_set_imm_ud(brw, else_inst, (next_inst - else_inst) * 16);
    } else {
-      if_inst->bits3.ud = (next_inst - if_inst) * 16;
+      brw_inst_set_imm_ud(brw, if_inst, (next_inst - if_inst) * 16);
    }
 }
 
@@ -1260,11 +1427,9 @@ convert_IF_ELSE_to_ADD(struct brw_compile *p,
  */
 static void
 patch_IF_ELSE(struct brw_compile *p,
-             struct brw_instruction *if_inst,
-             struct brw_instruction *else_inst,
-             struct brw_instruction *endif_inst)
+              brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
 {
-   struct intel_context *intel = &p->brw->intel;
+   struct brw_context *brw = p->brw;
 
    /* We shouldn't be patching IF and ELSE instructions in single program flow
     * mode when gen < 6, because in single program flow mode on those
@@ -1278,69 +1443,72 @@ patch_IF_ELSE(struct brw_compile *p,
     * instructions to conditional ADDs.  So we do patch IF and ELSE
     * instructions in single program flow mode on those platforms.
     */
-   if (intel->gen < 6)
+   if (brw->gen < 6)
       assert(!p->single_program_flow);
 
-   assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
+   assert(if_inst != NULL && brw_inst_opcode(brw, if_inst) == BRW_OPCODE_IF);
    assert(endif_inst != NULL);
-   assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
+   assert(else_inst == NULL || brw_inst_opcode(brw, else_inst) == BRW_OPCODE_ELSE);
 
-   unsigned br = 1;
-   /* Jump count is for 64bit data chunk each, so one 128bit instruction
-    * requires 2 chunks.
-    */
-   if (intel->gen >= 5)
-      br = 2;
+   unsigned br = brw_jump_scale(brw);
 
-   assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
-   endif_inst->header.execution_size = if_inst->header.execution_size;
+   assert(brw_inst_opcode(brw, endif_inst) == BRW_OPCODE_ENDIF);
+   brw_inst_set_exec_size(brw, endif_inst, brw_inst_exec_size(brw, if_inst));
 
    if (else_inst == NULL) {
       /* Patch IF -> ENDIF */
-      if (intel->gen < 6) {
+      if (brw->gen < 6) {
         /* Turn it into an IFF, which means no mask stack operations for
          * all-false and jumping past the ENDIF.
          */
-        if_inst->header.opcode = BRW_OPCODE_IFF;
-        if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
-        if_inst->bits3.if_else.pop_count = 0;
-        if_inst->bits3.if_else.pad0 = 0;
-      } else if (intel->gen == 6) {
+         brw_inst_set_opcode(brw, if_inst, BRW_OPCODE_IFF);
+         brw_inst_set_gen4_jump_count(brw, if_inst,
+                                      br * (endif_inst - if_inst + 1));
+         brw_inst_set_gen4_pop_count(brw, if_inst, 0);
+      } else if (brw->gen == 6) {
         /* As of gen6, there is no IFF and IF must point to the ENDIF. */
-        if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
+         brw_inst_set_gen6_jump_count(brw, if_inst, br*(endif_inst - if_inst));
       } else {
-        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
-        if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
+         brw_inst_set_uip(brw, if_inst, br * (endif_inst - if_inst));
+         brw_inst_set_jip(brw, if_inst, br * (endif_inst - if_inst));
       }
    } else {
-      else_inst->header.execution_size = if_inst->header.execution_size;
+      brw_inst_set_exec_size(brw, else_inst, brw_inst_exec_size(brw, if_inst));
 
       /* Patch IF -> ELSE */
-      if (intel->gen < 6) {
-        if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
-        if_inst->bits3.if_else.pop_count = 0;
-        if_inst->bits3.if_else.pad0 = 0;
-      } else if (intel->gen == 6) {
-        if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
+      if (brw->gen < 6) {
+         brw_inst_set_gen4_jump_count(brw, if_inst,
+                                      br * (else_inst - if_inst));
+         brw_inst_set_gen4_pop_count(brw, if_inst, 0);
+      } else if (brw->gen == 6) {
+         brw_inst_set_gen6_jump_count(brw, if_inst,
+                                      br * (else_inst - if_inst + 1));
       }
 
       /* Patch ELSE -> ENDIF */
-      if (intel->gen < 6) {
+      if (brw->gen < 6) {
         /* BRW_OPCODE_ELSE pre-gen6 should point just past the
          * matching ENDIF.
          */
-        else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
-        else_inst->bits3.if_else.pop_count = 1;
-        else_inst->bits3.if_else.pad0 = 0;
-      } else if (intel->gen == 6) {
+         brw_inst_set_gen4_jump_count(brw, else_inst,
+                                      br * (endif_inst - else_inst + 1));
+         brw_inst_set_gen4_pop_count(brw, else_inst, 1);
+      } else if (brw->gen == 6) {
         /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
-        else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
+         brw_inst_set_gen6_jump_count(brw, else_inst,
+                                      br * (endif_inst - else_inst));
       } else {
         /* The IF instruction's JIP should point just past the ELSE */
-        if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
+         brw_inst_set_jip(brw, if_inst, br * (else_inst - if_inst + 1));
         /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
-        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
-        else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
+         brw_inst_set_uip(brw, if_inst, br * (endif_inst - if_inst));
+         brw_inst_set_jip(brw, else_inst, br * (endif_inst - else_inst));
+         if (brw->gen >= 8) {
+            /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
+             * should point to ENDIF.
+             */
+            brw_inst_set_uip(brw, else_inst, br * (endif_inst - else_inst));
+         }
       }
    }
 }
@@ -1348,32 +1516,37 @@ patch_IF_ELSE(struct brw_compile *p,
 void
 brw_ELSE(struct brw_compile *p)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
+   struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_ELSE);
 
-   if (intel->gen < 6) {
+   if (brw->gen < 6) {
       brw_set_dest(p, insn, brw_ip_reg());
       brw_set_src0(p, insn, brw_ip_reg());
       brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else if (intel->gen == 6) {
+   } else if (brw->gen == 6) {
       brw_set_dest(p, insn, brw_imm_w(0));
-      insn->bits1.branch_gen6.jump_count = 0;
+      brw_inst_set_gen6_jump_count(brw, insn, 0);
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   } else {
+   } else if (brw->gen == 7) {
       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_ud(0));
-      insn->bits3.break_cont.jip = 0;
-      insn->bits3.break_cont.uip = 0;
+      brw_set_src1(p, insn, brw_imm_w(0));
+      brw_inst_set_jip(brw, insn, 0);
+      brw_inst_set_uip(brw, insn, 0);
+   } else {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, brw_imm_d(0));
+      brw_inst_set_jip(brw, insn, 0);
+      brw_inst_set_uip(brw, insn, 0);
    }
 
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.mask_control = BRW_MASK_ENABLE;
-   if (!p->single_program_flow)
-      insn->header.thread_control = BRW_THREAD_SWITCH;
+   brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
+   brw_inst_set_mask_control(brw, insn, BRW_MASK_ENABLE);
+   if (!p->single_program_flow && brw->gen < 6)
+      brw_inst_set_thread_control(brw, insn, BRW_THREAD_SWITCH);
 
    push_if_stack(p, insn);
 }
@@ -1381,11 +1554,11 @@ brw_ELSE(struct brw_compile *p)
 void
 brw_ENDIF(struct brw_compile *p)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn = NULL;
-   struct brw_instruction *else_inst = NULL;
-   struct brw_instruction *if_inst = NULL;
-   struct brw_instruction *tmp;
+   struct brw_context *brw = p->brw;
+   brw_inst *insn = NULL;
+   brw_inst *else_inst = NULL;
+   brw_inst *if_inst = NULL;
+   brw_inst *tmp;
    bool emit_endif = true;
 
    /* In single program flow mode, we can express IF and ELSE instructions
@@ -1400,7 +1573,7 @@ brw_ENDIF(struct brw_compile *p)
     * instructions to conditional ADDs.  So we only do this trick on Gen4 and
     * Gen5.
     */
-   if (intel->gen < 6 && p->single_program_flow)
+   if (brw->gen < 6 && p->single_program_flow)
       emit_endif = false;
 
    /*
@@ -1414,7 +1587,7 @@ brw_ENDIF(struct brw_compile *p)
    /* Pop the IF and (optional) ELSE instructions from the stack */
    p->if_depth_in_loop[p->loop_stack_depth]--;
    tmp = pop_if_stack(p);
-   if (tmp->header.opcode == BRW_OPCODE_ELSE) {
+   if (brw_inst_opcode(brw, tmp) == BRW_OPCODE_ELSE) {
       else_inst = tmp;
       tmp = pop_if_stack(p);
    }
@@ -1426,44 +1599,50 @@ brw_ENDIF(struct brw_compile *p)
       return;
    }
 
-   if (intel->gen < 6) {
+   if (brw->gen < 6) {
       brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
       brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
       brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else if (intel->gen == 6) {
+   } else if (brw->gen == 6) {
       brw_set_dest(p, insn, brw_imm_w(0));
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   } else {
+   } else if (brw->gen == 7) {
       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_ud(0));
+      brw_set_src1(p, insn, brw_imm_w(0));
+   } else {
+      brw_set_src0(p, insn, brw_imm_d(0));
    }
 
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.mask_control = BRW_MASK_ENABLE;
-   insn->header.thread_control = BRW_THREAD_SWITCH;
+   brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
+   brw_inst_set_mask_control(brw, insn, BRW_MASK_ENABLE);
+   if (brw->gen < 6)
+      brw_inst_set_thread_control(brw, insn, BRW_THREAD_SWITCH);
 
    /* Also pop item off the stack in the endif instruction: */
-   if (intel->gen < 6) {
-      insn->bits3.if_else.jump_count = 0;
-      insn->bits3.if_else.pop_count = 1;
-      insn->bits3.if_else.pad0 = 0;
-   } else if (intel->gen == 6) {
-      insn->bits1.branch_gen6.jump_count = 2;
+   if (brw->gen < 6) {
+      brw_inst_set_gen4_jump_count(brw, insn, 0);
+      brw_inst_set_gen4_pop_count(brw, insn, 1);
+   } else if (brw->gen == 6) {
+      brw_inst_set_gen6_jump_count(brw, insn, 2);
    } else {
-      insn->bits3.break_cont.jip = 2;
+      brw_inst_set_jip(brw, insn, 2);
    }
    patch_IF_ELSE(p, if_inst, else_inst, insn);
 }
 
-struct brw_instruction *brw_BREAK(struct brw_compile *p)
+brw_inst *
+brw_BREAK(struct brw_compile *p)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
+   struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_BREAK);
-   if (intel->gen >= 6) {
+   if (brw->gen >= 8) {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, brw_imm_d(0x0));
+   } else if (brw->gen >= 6) {
       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, brw_imm_d(0x0));
@@ -1471,60 +1650,61 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p)
       brw_set_dest(p, insn, brw_ip_reg());
       brw_set_src0(p, insn, brw_ip_reg());
       brw_set_src1(p, insn, brw_imm_d(0x0));
-      insn->bits3.if_else.pad0 = 0;
-      insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
+      brw_inst_set_gen4_pop_count(brw, insn,
+                                  p->if_depth_in_loop[p->loop_stack_depth]);
    }
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.execution_size = BRW_EXECUTE_8;
+   brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
+   brw_inst_set_exec_size(brw, insn, p->compressed ? BRW_EXECUTE_16
+                                                   : BRW_EXECUTE_8);
 
    return insn;
 }
 
-struct brw_instruction *gen6_CONT(struct brw_compile *p)
+brw_inst *
+brw_CONT(struct brw_compile *p)
 {
-   struct brw_instruction *insn;
+   const struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_CONTINUE);
-   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
    brw_set_dest(p, insn, brw_ip_reg());
-   brw_set_src0(p, insn, brw_ip_reg());
-   brw_set_src1(p, insn, brw_imm_d(0x0));
-
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.execution_size = BRW_EXECUTE_8;
-   return insn;
-}
+   if (brw->gen >= 8) {
+      brw_set_src0(p, insn, brw_imm_d(0x0));
+   } else {
+      brw_set_src0(p, insn, brw_ip_reg());
+      brw_set_src1(p, insn, brw_imm_d(0x0));
+   }
 
-struct brw_instruction *brw_CONT(struct brw_compile *p)
-{
-   struct brw_instruction *insn;
-   insn = next_insn(p, BRW_OPCODE_CONTINUE);
-   brw_set_dest(p, insn, brw_ip_reg());
-   brw_set_src0(p, insn, brw_ip_reg());
-   brw_set_src1(p, insn, brw_imm_d(0x0));
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.execution_size = BRW_EXECUTE_8;
-   /* insn->header.mask_control = BRW_MASK_DISABLE; */
-   insn->bits3.if_else.pad0 = 0;
-   insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
+   if (brw->gen < 6) {
+      brw_inst_set_gen4_pop_count(brw, insn,
+                                  p->if_depth_in_loop[p->loop_stack_depth]);
+   }
+   brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
+   brw_inst_set_exec_size(brw, insn, p->compressed ? BRW_EXECUTE_16
+                                                   : BRW_EXECUTE_8);
    return insn;
 }
 
-struct brw_instruction *gen6_HALT(struct brw_compile *p)
+brw_inst *
+gen6_HALT(struct brw_compile *p)
 {
-   struct brw_instruction *insn;
+   const struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_HALT);
    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+   if (brw->gen >= 8) {
+      brw_set_src0(p, insn, brw_imm_d(0x0));
+   } else {
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+   }
 
    if (p->compressed) {
-      insn->header.execution_size = BRW_EXECUTE_16;
+      brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_16);
    } else {
-      insn->header.compression_control = BRW_COMPRESSION_NONE;
-      insn->header.execution_size = BRW_EXECUTE_8;
+      brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
+      brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_8);
    }
    return insn;
 }
@@ -1545,15 +1725,16 @@ struct brw_instruction *gen6_HALT(struct brw_compile *p)
  * For gen6, there's no more mask stack, so no need for DO.  WHILE
  * just points back to the first instruction of the loop.
  */
-struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
+brw_inst *
+brw_DO(struct brw_compile *p, unsigned execute_size)
 {
-   struct intel_context *intel = &p->brw->intel;
+   struct brw_context *brw = p->brw;
 
-   if (intel->gen >= 6 || p->single_program_flow) {
+   if (brw->gen >= 6 || p->single_program_flow) {
       push_loop_stack(p, &p->store[p->nr_insn]);
       return &p->store[p->nr_insn];
    } else {
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+      brw_inst *insn = next_insn(p, BRW_OPCODE_DO);
 
       push_loop_stack(p, insn);
 
@@ -1563,11 +1744,9 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
       brw_set_src0(p, insn, brw_null_reg());
       brw_set_src1(p, insn, brw_null_reg());
 
-      insn->header.compression_control = BRW_COMPRESSION_NONE;
-      insn->header.execution_size = execute_size;
-      insn->header.predicate_control = BRW_PREDICATE_NONE;
-      /* insn->header.mask_control = BRW_MASK_ENABLE; */
-      /* insn->header.mask_control = BRW_MASK_DISABLE; */
+      brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
+      brw_inst_set_exec_size(brw, insn, execute_size);
+      brw_inst_set_pred_control(brw, insn, BRW_PREDICATE_NONE);
 
       return insn;
    }
@@ -1581,57 +1760,59 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
  * nesting, since it can always just point to the end of the block/current loop.
  */
 static void
-brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
+brw_patch_break_cont(struct brw_compile *p, brw_inst *while_inst)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *do_inst = get_inner_do_insn(p);
-   struct brw_instruction *inst;
-   int br = (intel->gen == 5) ? 2 : 1;
+   struct brw_context *brw = p->brw;
+   brw_inst *do_inst = get_inner_do_insn(p);
+   brw_inst *inst;
+   unsigned br = brw_jump_scale(brw);
+
+   assert(brw->gen < 6);
 
    for (inst = while_inst - 1; inst != do_inst; inst--) {
       /* If the jump count is != 0, that means that this instruction has already
        * been patched because it's part of a loop inside of the one we're
        * patching.
        */
-      if (inst->header.opcode == BRW_OPCODE_BREAK &&
-         inst->bits3.if_else.jump_count == 0) {
-        inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
-      } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
-                inst->bits3.if_else.jump_count == 0) {
-        inst->bits3.if_else.jump_count = br * (while_inst - inst);
+      if (brw_inst_opcode(brw, inst) == BRW_OPCODE_BREAK &&
+          brw_inst_gen4_jump_count(brw, inst) == 0) {
+         brw_inst_set_gen4_jump_count(brw, inst, br*((while_inst - inst) + 1));
+      } else if (brw_inst_opcode(brw, inst) == BRW_OPCODE_CONTINUE &&
+                 brw_inst_gen4_jump_count(brw, inst) == 0) {
+         brw_inst_set_gen4_jump_count(brw, inst, br * (while_inst - inst));
       }
    }
 }
 
-struct brw_instruction *brw_WHILE(struct brw_compile *p)
+brw_inst *
+brw_WHILE(struct brw_compile *p)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn, *do_insn;
-   GLuint br = 1;
-
-   if (intel->gen >= 5)
-      br = 2;
-
-   if (intel->gen >= 7) {
-      insn = next_insn(p, BRW_OPCODE_WHILE);
-      do_insn = get_inner_do_insn(p);
-
-      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_ud(0));
-      insn->bits3.break_cont.jip = br * (do_insn - insn);
+   struct brw_context *brw = p->brw;
+   brw_inst *insn, *do_insn;
+   unsigned br = brw_jump_scale(brw);
 
-      insn->header.execution_size = BRW_EXECUTE_8;
-   } else if (intel->gen == 6) {
+   if (brw->gen >= 6) {
       insn = next_insn(p, BRW_OPCODE_WHILE);
       do_insn = get_inner_do_insn(p);
 
-      brw_set_dest(p, insn, brw_imm_w(0));
-      insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      if (brw->gen >= 8) {
+         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+         brw_set_src0(p, insn, brw_imm_d(0));
+         brw_inst_set_jip(brw, insn, br * (do_insn - insn));
+      } else if (brw->gen == 7) {
+         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+         brw_set_src1(p, insn, brw_imm_w(0));
+         brw_inst_set_jip(brw, insn, br * (do_insn - insn));
+      } else {
+         brw_set_dest(p, insn, brw_imm_w(0));
+         brw_inst_set_gen6_jump_count(brw, insn, br * (do_insn - insn));
+         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+         brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      }
 
-      insn->header.execution_size = BRW_EXECUTE_8;
+      brw_inst_set_exec_size(brw, insn, p->compressed ? BRW_EXECUTE_16
+                                                      : BRW_EXECUTE_8);
    } else {
       if (p->single_program_flow) {
         insn = next_insn(p, BRW_OPCODE_ADD);
@@ -1640,84 +1821,67 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p)
         brw_set_dest(p, insn, brw_ip_reg());
         brw_set_src0(p, insn, brw_ip_reg());
         brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
-        insn->header.execution_size = BRW_EXECUTE_1;
+         brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_1);
       } else {
         insn = next_insn(p, BRW_OPCODE_WHILE);
          do_insn = get_inner_do_insn(p);
 
-        assert(do_insn->header.opcode == BRW_OPCODE_DO);
+         assert(brw_inst_opcode(brw, do_insn) == BRW_OPCODE_DO);
 
         brw_set_dest(p, insn, brw_ip_reg());
         brw_set_src0(p, insn, brw_ip_reg());
         brw_set_src1(p, insn, brw_imm_d(0));
 
-        insn->header.execution_size = do_insn->header.execution_size;
-        insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
-        insn->bits3.if_else.pop_count = 0;
-        insn->bits3.if_else.pad0 = 0;
+         brw_inst_set_exec_size(brw, insn, brw_inst_exec_size(brw, do_insn));
+         brw_inst_set_gen4_jump_count(brw, insn, br * (do_insn - insn + 1));
+         brw_inst_set_gen4_pop_count(brw, insn, 0);
 
         brw_patch_break_cont(p, insn);
       }
    }
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+   brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
 
    p->loop_stack_depth--;
 
    return insn;
 }
 
-
 /* FORWARD JUMPS:
  */
 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
-   GLuint jmpi = 1;
+   struct brw_context *brw = p->brw;
+   brw_inst *jmp_insn = &p->store[jmp_insn_idx];
+   unsigned jmpi = 1;
 
-   if (intel->gen >= 5)
+   if (brw->gen >= 5)
       jmpi = 2;
 
-   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
-   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
+   assert(brw_inst_opcode(brw, jmp_insn) == BRW_OPCODE_JMPI);
+   assert(brw_inst_src1_reg_file(brw, jmp_insn) == BRW_IMMEDIATE_VALUE);
 
-   jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
+   brw_inst_set_gen4_jump_count(brw, jmp_insn,
+                                jmpi * (p->nr_insn - jmp_insn_idx - 1));
 }
 
-
-
 /* To integrate with the above, it makes sense that the comparison
  * instruction should populate the flag register.  It might be simpler
  * just to use the flag reg for most WM tasks?
  */
 void brw_CMP(struct brw_compile *p,
             struct brw_reg dest,
-            GLuint conditional,
+            unsigned conditional,
             struct brw_reg src0,
             struct brw_reg src1)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+   struct brw_context *brw = p->brw;
+   brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
 
-   insn->header.destreg__conditionalmod = conditional;
+   brw_inst_set_cond_modifier(brw, insn, conditional);
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
    brw_set_src1(p, insn, src1);
 
-/*    guess_execution_size(insn, src0); */
-
-
-   /* Make it so that future instructions will use the computed flag
-    * value until brw_set_predicate_control_flag_value() is called
-    * again.  
-    */
-   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
-       dest.nr == 0) {
-      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
-      p->flag_value = 0xff;
-   }
-
    /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
     * page says:
     *    "Any CMP instruction with a null destination must use a {switch}."
@@ -1725,121 +1889,72 @@ void brw_CMP(struct brw_compile *p,
     * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
     * mentioned on their work-arounds pages.
     */
-   if (intel->gen == 7) {
+   if (brw->gen == 7) {
       if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
           dest.nr == BRW_ARF_NULL) {
-         insn->header.thread_control = BRW_THREAD_SWITCH;
+         brw_inst_set_thread_control(brw, insn, BRW_THREAD_SWITCH);
       }
    }
 }
 
-/* Issue 'wait' instruction for n1, host could program MMIO
-   to wake up thread. */
-void brw_WAIT (struct brw_compile *p)
-{
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
-   struct brw_reg src = brw_notification_1_reg();
-
-   brw_set_dest(p, insn, src);
-   brw_set_src0(p, insn, src);
-   brw_set_src1(p, insn, brw_null_reg());
-   insn->header.execution_size = 0; /* must */
-   insn->header.predicate_control = 0;
-   insn->header.compression_control = 0;
-}
-
-
 /***********************************************************************
  * Helpers for the various SEND message types:
  */
 
 /** Extended math function, float[8].
  */
-void brw_math( struct brw_compile *p,
+void gen4_math(struct brw_compile *p,
               struct brw_reg dest,
-              GLuint function,
-              GLuint msg_reg_nr,
+              unsigned function,
+              unsigned msg_reg_nr,
               struct brw_reg src,
-              GLuint data_type,
-              GLuint precision )
+              unsigned precision )
 {
-   struct intel_context *intel = &p->brw->intel;
-
-   if (intel->gen >= 6) {
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
-
-      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
-             (intel->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
-      assert(src.file == BRW_GENERAL_REGISTER_FILE);
-
-      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
-      if (intel->gen == 6)
-        assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
-
-      /* Source modifiers are ignored for extended math instructions on Gen6. */
-      if (intel->gen == 6) {
-        assert(!src.negate);
-        assert(!src.abs);
-      }
-
-      if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
-         function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
-         function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
-        assert(src.type != BRW_REGISTER_TYPE_F);
-      } else {
-        assert(src.type == BRW_REGISTER_TYPE_F);
-      }
-
-      /* Math is the same ISA format as other opcodes, except that CondModifier
-       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
-       */
-      insn->header.destreg__conditionalmod = function;
-
-      brw_set_dest(p, insn, dest);
-      brw_set_src0(p, insn, src);
-      brw_set_src1(p, insn, brw_null_reg());
+   struct brw_context *brw = p->brw;
+   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+   unsigned data_type;
+   if (has_scalar_region(src)) {
+      data_type = BRW_MATH_DATA_SCALAR;
    } else {
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      data_type = BRW_MATH_DATA_VECTOR;
+   }
 
-      /* Example code doesn't set predicate_control for send
-       * instructions.
-       */
-      insn->header.predicate_control = 0;
-      insn->header.destreg__conditionalmod = msg_reg_nr;
+   assert(brw->gen < 6);
 
-      brw_set_dest(p, insn, dest);
-      brw_set_src0(p, insn, src);
-      brw_set_math_message(p,
-                          insn,
-                          function,
-                          src.type == BRW_REGISTER_TYPE_D,
-                          precision,
-                          data_type);
-   }
+   /* Example code doesn't set predicate_control for send
+    * instructions.
+    */
+   brw_inst_set_pred_control(brw, insn, 0);
+   brw_inst_set_base_mrf(brw, insn, msg_reg_nr);
+
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src);
+   brw_set_math_message(p,
+                        insn,
+                        function,
+                        src.type == BRW_REGISTER_TYPE_D,
+                        precision,
+                        data_type);
 }
 
-/** Extended math function, float[8].
- */
-void brw_math2(struct brw_compile *p,
+void gen6_math(struct brw_compile *p,
               struct brw_reg dest,
-              GLuint function,
+              unsigned function,
               struct brw_reg src0,
               struct brw_reg src1)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
-
-   assert(intel->gen >= 6);
-   (void) intel;
+   struct brw_context *brw = p->brw;
+   brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
 
+   assert(brw->gen >= 6);
 
    assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
-          (intel->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
-   assert(src0.file == BRW_GENERAL_REGISTER_FILE);
-   assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+          (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
+   assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
+          (brw->gen >= 8 && src0.file == BRW_IMMEDIATE_VALUE));
 
    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
-   if (intel->gen == 6) {
+   if (brw->gen == 6) {
       assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
       assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
    }
@@ -1849,23 +1964,29 @@ void brw_math2(struct brw_compile *p,
        function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
       assert(src0.type != BRW_REGISTER_TYPE_F);
       assert(src1.type != BRW_REGISTER_TYPE_F);
+      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
+             (brw->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
    } else {
       assert(src0.type == BRW_REGISTER_TYPE_F);
       assert(src1.type == BRW_REGISTER_TYPE_F);
+      if (function == BRW_MATH_FUNCTION_POW) {
+         assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
+                (brw->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
+      } else {
+         assert(src1.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+                src1.nr == BRW_ARF_NULL);
+      }
    }
 
    /* Source modifiers are ignored for extended math instructions on Gen6. */
-   if (intel->gen == 6) {
+   if (brw->gen == 6) {
       assert(!src0.negate);
       assert(!src0.abs);
       assert(!src1.negate);
       assert(!src1.abs);
    }
 
-   /* Math is the same ISA format as other opcodes, except that CondModifier
-    * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
-    */
-   insn->header.destreg__conditionalmod = function;
+   brw_inst_set_math_function(brw, insn, function);
 
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
@@ -1883,13 +2004,13 @@ void brw_math2(struct brw_compile *p,
 void brw_oword_block_write_scratch(struct brw_compile *p,
                                   struct brw_reg mrf,
                                   int num_regs,
-                                  GLuint offset)
+                                  unsigned offset)
 {
-   struct intel_context *intel = &p->brw->intel;
+   struct brw_context *brw = p->brw;
    uint32_t msg_control, msg_type;
    int mlen;
 
-   if (intel->gen >= 6)
+   if (brw->gen >= 6)
       offset /= 16;
 
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
@@ -1909,8 +2030,8 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
     */
    {
       brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
 
       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
@@ -1926,17 +2047,18 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
 
    {
       struct brw_reg dest;
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
       int send_commit_msg;
       struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
                                         BRW_REGISTER_TYPE_UW);
 
-      if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
-        insn->header.compression_control = BRW_COMPRESSION_NONE;
+      if (brw_inst_qtr_control(brw, insn) != BRW_COMPRESSION_NONE) {
+         brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
         src_header = vec16(src_header);
       }
-      assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
-      insn->header.destreg__conditionalmod = mrf.nr;
+      assert(brw_inst_pred_control(brw, insn) == BRW_PREDICATE_NONE);
+      if (brw->gen < 6)
+         brw_inst_set_base_mrf(brw, insn, mrf.nr);
 
       /* Until gen6, writes followed by reads from the same location
        * are not guaranteed to be ordered unless write_commit is set.
@@ -1948,7 +2070,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
        * protection.  Our use of DP writes is all about register
        * spilling within a thread.
        */
-      if (intel->gen >= 6) {
+      if (brw->gen >= 6) {
         dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
         send_commit_msg = 0;
       } else {
@@ -1957,13 +2079,13 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
       }
 
       brw_set_dest(p, insn, dest);
-      if (intel->gen >= 6) {
+      if (brw->gen >= 6) {
         brw_set_src0(p, insn, mrf);
       } else {
         brw_set_src0(p, insn, brw_null_reg());
       }
 
-      if (intel->gen >= 6)
+      if (brw->gen >= 6)
         msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
       else
         msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
@@ -1995,16 +2117,27 @@ brw_oword_block_read_scratch(struct brw_compile *p,
                             struct brw_reg dest,
                             struct brw_reg mrf,
                             int num_regs,
-                            GLuint offset)
+                            unsigned offset)
 {
-   struct intel_context *intel = &p->brw->intel;
+   struct brw_context *brw = p->brw;
    uint32_t msg_control;
    int rlen;
 
-   if (intel->gen >= 6)
+   if (brw->gen >= 6)
       offset /= 16;
 
-   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+   if (p->brw->gen >= 7) {
+      /* On gen 7 and above, we no longer have message registers and we can
+       * send from any register we want.  By using the destination register
+       * for the message, we guarantee that the implied message write won't
+       * accidentally overwrite anything.  This has been a problem because
+       * the MRF registers and source for the final FB write are both fixed
+       * and may overlap.
+       */
+      mrf = retype(dest, BRW_REGISTER_TYPE_UD);
+   } else {
+      mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+   }
    dest = retype(dest, BRW_REGISTER_TYPE_UW);
 
    if (num_regs == 1) {
@@ -2017,33 +2150,29 @@ brw_oword_block_read_scratch(struct brw_compile *p,
 
    {
       brw_push_insn_state(p);
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
       /* set message header global offset field (reg 0, element 2) */
-      brw_MOV(p,
-             retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
-                                 mrf.nr,
-                                 2), BRW_REGISTER_TYPE_UD),
-             brw_imm_ud(offset));
+      brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
 
       brw_pop_insn_state(p);
    }
 
    {
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
-      assert(insn->header.predicate_control == 0);
-      insn->header.compression_control = BRW_COMPRESSION_NONE;
-      insn->header.destreg__conditionalmod = mrf.nr;
+      assert(brw_inst_pred_control(brw, insn) == 0);
+      brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
 
       brw_set_dest(p, insn, dest);     /* UW? */
-      if (intel->gen >= 6) {
+      if (brw->gen >= 6) {
         brw_set_src0(p, insn, mrf);
       } else {
         brw_set_src0(p, insn, brw_null_reg());
+         brw_inst_set_base_mrf(brw, insn, mrf.nr);
       }
 
       brw_set_dp_read_message(p,
@@ -2058,6 +2187,42 @@ brw_oword_block_read_scratch(struct brw_compile *p,
    }
 }
 
+void
+gen7_block_read_scratch(struct brw_compile *p,
+                        struct brw_reg dest,
+                        int num_regs,
+                        unsigned offset)
+{
+   const struct brw_context *brw = p->brw;
+   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+   assert(brw_inst_pred_control(brw, insn) == BRW_PREDICATE_NONE);
+
+   brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
+   brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UW));
+
+   /* The HW requires that the header is present; this is to get the g0.5
+    * scratch offset.
+    */
+   brw_set_src0(p, insn, brw_vec8_grf(0, 0));
+
+   /* According to the docs, offset is "A 12-bit HWord offset into the memory
+    * Immediate Memory buffer as specified by binding table 0xFF."  An HWORD
+    * is 32 bytes, which happens to be the size of a register.
+    */
+   offset /= REG_SIZE;
+   assert(offset < (1 << 12));
+
+   gen7_set_dp_scratch_message(p, insn,
+                               false, /* scratch read */
+                               false, /* OWords */
+                               false, /* invalidate after read */
+                               num_regs,
+                               offset,
+                               1,        /* mlen: just g0 */
+                               num_regs, /* rlen */
+                               true);    /* header present */
+}
+
 /**
  * Read a float[4] vector from the data port Data Cache (const buffer).
  * Location (in buffer) should be a multiple of 16.
@@ -2069,18 +2234,18 @@ void brw_oword_block_read(struct brw_compile *p,
                          uint32_t offset,
                          uint32_t bind_table_index)
 {
-   struct intel_context *intel = &p->brw->intel;
+   struct brw_context *brw = p->brw;
 
    /* On newer hardware, offset is in units of owords. */
-   if (intel->gen >= 6)
+   if (brw->gen >= 6)
       offset /= 16;
 
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
    brw_push_insn_state(p);
-   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
    brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
@@ -2091,17 +2256,17 @@ void brw_oword_block_read(struct brw_compile *p,
                               2), BRW_REGISTER_TYPE_UD),
           brw_imm_ud(offset));
 
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-   insn->header.destreg__conditionalmod = mrf.nr;
+   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
    /* cast dest to a uword[8] vector */
    dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
 
    brw_set_dest(p, insn, dest);
-   if (intel->gen >= 6) {
+   if (brw->gen >= 6) {
       brw_set_src0(p, insn, mrf);
    } else {
       brw_set_src0(p, insn, brw_null_reg());
+      brw_inst_set_base_mrf(brw, insn, mrf.nr);
    }
 
    brw_set_dp_read_message(p,
@@ -2120,41 +2285,42 @@ void brw_oword_block_read(struct brw_compile *p,
 
 void brw_fb_WRITE(struct brw_compile *p,
                  int dispatch_width,
-                  GLuint msg_reg_nr,
-                  struct brw_reg src0,
-                  GLuint msg_control,
-                  GLuint binding_table_index,
-                  GLuint msg_length,
-                  GLuint response_length,
+                  struct brw_reg payload,
+                  struct brw_reg implied_header,
+                  unsigned msg_control,
+                  unsigned binding_table_index,
+                  unsigned msg_length,
+                  unsigned response_length,
                   bool eot,
+                  bool last_render_target,
                   bool header_present)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
-   GLuint msg_type;
-   struct brw_reg dest;
+   struct brw_context *brw = p->brw;
+   brw_inst *insn;
+   unsigned msg_type;
+   struct brw_reg dest, src0;
 
    if (dispatch_width == 16)
       dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
    else
       dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
 
-   if (intel->gen >= 6) {
+   if (brw->gen >= 6) {
       insn = next_insn(p, BRW_OPCODE_SENDC);
    } else {
       insn = next_insn(p, BRW_OPCODE_SEND);
    }
-   /* The execution mask is ignored for render target writes. */
-   insn->header.predicate_control = 0;
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
 
-   if (intel->gen >= 6) {
+   if (brw->gen >= 6) {
       /* headerless version, just submit color payload */
-      src0 = brw_message_reg(msg_reg_nr);
+      src0 = payload;
 
       msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
    } else {
-      insn->header.destreg__conditionalmod = msg_reg_nr;
+      assert(payload.file == BRW_MESSAGE_REGISTER_FILE);
+      brw_inst_set_base_mrf(brw, insn, payload.nr);
+      src0 = implied_header;
 
       msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
    }
@@ -2168,7 +2334,7 @@ void brw_fb_WRITE(struct brw_compile *p,
                            msg_type,
                            msg_length,
                            header_present,
-                           eot, /* last render target write */
+                           last_render_target,
                            response_length,
                            eot,
                            0 /* send_commit_msg */);
@@ -2182,27 +2348,43 @@ void brw_fb_WRITE(struct brw_compile *p,
  */
 void brw_SAMPLE(struct brw_compile *p,
                struct brw_reg dest,
-               GLuint msg_reg_nr,
+               unsigned msg_reg_nr,
                struct brw_reg src0,
-               GLuint binding_table_index,
-               GLuint sampler,
-               GLuint msg_type,
-               GLuint response_length,
-               GLuint msg_length,
-               GLuint header_present,
-               GLuint simd_mode,
-               GLuint return_format)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
+               unsigned binding_table_index,
+               unsigned sampler,
+               unsigned msg_type,
+               unsigned response_length,
+               unsigned msg_length,
+               unsigned header_present,
+               unsigned simd_mode,
+               unsigned return_format)
+{
+   struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
-   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+   if (msg_reg_nr != -1)
+      gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
    insn = next_insn(p, BRW_OPCODE_SEND);
-   insn->header.predicate_control = 0; /* XXX */
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   if (intel->gen < 6)
-      insn->header.destreg__conditionalmod = msg_reg_nr;
+   brw_inst_set_pred_control(brw, insn, BRW_PREDICATE_NONE); /* XXX */
+
+   /* From the 965 PRM (volume 4, part 1, section 14.2.41):
+    *
+    *    "Instruction compression is not allowed for this instruction (that
+    *     is, send). The hardware behavior is undefined if this instruction is
+    *     set as compressed. However, compress control can be set to "SecHalf"
+    *     to affect the EMask generation."
+    *
+    * No similar wording is found in later PRMs, but there are examples
+    * utilizing send with SecHalf.  More importantly, SIMD8 sampler messages
+    * are allowed in SIMD16 mode and they could not work without SecHalf.  For
+    * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
+    */
+   if (brw_inst_qtr_control(brw, insn) != BRW_COMPRESSION_2NDHALF)
+      brw_inst_set_qtr_control(brw, insn, BRW_COMPRESSION_NONE);
+
+   if (brw->gen < 6)
+      brw_inst_set_base_mrf(brw, insn, msg_reg_nr);
 
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
@@ -2217,33 +2399,76 @@ void brw_SAMPLE(struct brw_compile *p,
                            return_format);
 }
 
+/* Adjust the message header's sampler state pointer to
+ * select the correct group of 16 samplers.
+ */
+void brw_adjust_sampler_state_pointer(struct brw_compile *p,
+                                      struct brw_reg header,
+                                      struct brw_reg sampler_index)
+{
+   /* The "Sampler Index" field can only store values between 0 and 15.
+    * However, we can add an offset to the "Sampler State Pointer"
+    * field, effectively selecting a different set of 16 samplers.
+    *
+    * The "Sampler State Pointer" needs to be aligned to a 32-byte
+    * offset, and each sampler state is only 16-bytes, so we can't
+    * exclusively use the offset - we have to use both.
+    */
+
+   struct brw_context *brw = p->brw;
+
+   if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
+      const int sampler_state_size = 16; /* 16 bytes */
+      uint32_t sampler = sampler_index.dw1.ud;
+
+      if (sampler >= 16) {
+         assert(brw->is_haswell || brw->gen >= 8);
+         brw_ADD(p,
+                 get_element_ud(header, 3),
+                 get_element_ud(brw_vec8_grf(0, 0), 3),
+                 brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
+      }
+   } else {
+      /* Non-const sampler array indexing case */
+      if (brw->gen < 8 && !brw->is_haswell) {
+         return;
+      }
+
+      struct brw_reg temp = get_element_ud(header, 3);
+
+      brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
+      brw_SHL(p, temp, temp, brw_imm_ud(4));
+      brw_ADD(p,
+              get_element_ud(header, 3),
+              get_element_ud(brw_vec8_grf(0, 0), 3),
+              temp);
+   }
+}
+
 /* All these variables are pretty confusing - we might be better off
  * using bitmasks and macros for this, in the old style.  Or perhaps
  * just having the caller instantiate the fields in dword3 itself.
  */
 void brw_urb_WRITE(struct brw_compile *p,
                   struct brw_reg dest,
-                  GLuint msg_reg_nr,
+                  unsigned msg_reg_nr,
                   struct brw_reg src0,
-                  bool allocate,
-                  bool used,
-                  GLuint msg_length,
-                  GLuint response_length,
-                  bool eot,
-                  bool writes_complete,
-                  GLuint offset,
-                  GLuint swizzle)
+                   enum brw_urb_write_flags flags,
+                  unsigned msg_length,
+                  unsigned response_length,
+                  unsigned offset,
+                  unsigned swizzle)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
+   struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
-   if (intel->gen == 7) {
+   if (brw->gen >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
       /* Enable Channel Masks in the URB_WRITE_HWORD message header */
       brw_push_insn_state(p);
-      brw_set_access_mode(p, BRW_ALIGN_1);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
                       BRW_REGISTER_TYPE_UD),
                retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
@@ -2259,47 +2484,36 @@ void brw_urb_WRITE(struct brw_compile *p,
    brw_set_src0(p, insn, src0);
    brw_set_src1(p, insn, brw_imm_d(0));
 
-   if (intel->gen < 6)
-      insn->header.destreg__conditionalmod = msg_reg_nr;
+   if (brw->gen < 6)
+      brw_inst_set_base_mrf(brw, insn, msg_reg_nr);
 
    brw_set_urb_message(p,
                       insn,
-                      allocate,
-                      used,
+                      flags,
                       msg_length,
-                      response_length, 
-                      eot, 
-                      writes_complete, 
+                      response_length,
                       offset,
                       swizzle);
 }
 
 static int
-next_ip(struct brw_compile *p, int ip)
+brw_find_next_block_end(struct brw_compile *p, int start_offset)
 {
-   struct brw_instruction *insn = (void *)p->store + ip;
-
-   if (insn->header.cmpt_control)
-      return ip + 8;
-   else
-      return ip + 16;
-}
-
-static int
-brw_find_next_block_end(struct brw_compile *p, int start)
-{
-   int ip;
+   int offset;
    void *store = p->store;
+   const struct brw_context *brw = p->brw;
 
-   for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
-      struct brw_instruction *insn = store + ip;
+   for (offset = next_offset(brw, store, start_offset);
+        offset < p->next_insn_offset;
+        offset = next_offset(brw, store, offset)) {
+      brw_inst *insn = store + offset;
 
-      switch (insn->header.opcode) {
+      switch (brw_inst_opcode(brw, insn)) {
       case BRW_OPCODE_ENDIF:
       case BRW_OPCODE_ELSE:
       case BRW_OPCODE_WHILE:
       case BRW_OPCODE_HALT:
-        return ip;
+        return offset;
       }
    }
 
@@ -2311,28 +2525,32 @@ brw_find_next_block_end(struct brw_compile *p, int start)
  * instruction.
  */
 static int
-brw_find_loop_end(struct brw_compile *p, int start)
+brw_find_loop_end(struct brw_compile *p, int start_offset)
 {
-   struct intel_context *intel = &p->brw->intel;
-   int ip;
-   int scale = 8;
+   struct brw_context *brw = p->brw;
+   int offset;
+   int scale = 16 / brw_jump_scale(brw);
    void *store = p->store;
 
+   assert(brw->gen >= 6);
+
    /* Always start after the instruction (such as a WHILE) we're trying to fix
     * up.
     */
-   for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
-      struct brw_instruction *insn = store + ip;
-
-      if (insn->header.opcode == BRW_OPCODE_WHILE) {
-        int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
-                                  : insn->bits3.break_cont.jip;
-        if (ip + jip * scale <= start)
-           return ip;
+   for (offset = next_offset(brw, store, start_offset);
+        offset < p->next_insn_offset;
+        offset = next_offset(brw, store, offset)) {
+      brw_inst *insn = store + offset;
+
+      if (brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE) {
+         int jip = brw->gen == 6 ? brw_inst_gen6_jump_count(brw, insn)
+                                 : brw_inst_jip(brw, insn);
+        if (offset + jip * scale <= start_offset)
+           return offset;
       }
    }
    assert(!"not reached");
-   return start;
+   return start_offset;
 }
 
 /* After program generation, go back and update the UIP and JIP of
@@ -2341,51 +2559,56 @@ brw_find_loop_end(struct brw_compile *p, int start)
 void
 brw_set_uip_jip(struct brw_compile *p)
 {
-   struct intel_context *intel = &p->brw->intel;
-   int ip;
-   int scale = 8;
+   struct brw_context *brw = p->brw;
+   int offset;
+   int br = brw_jump_scale(brw);
+   int scale = 16 / br;
    void *store = p->store;
 
-   if (intel->gen < 6)
+   if (brw->gen < 6)
       return;
 
-   for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
-      struct brw_instruction *insn = store + ip;
+   for (offset = 0; offset < p->next_insn_offset;
+        offset = next_offset(brw, store, offset)) {
+      brw_inst *insn = store + offset;
 
-      if (insn->header.cmpt_control) {
+      if (brw_inst_cmpt_control(brw, insn)) {
         /* Fixups for compacted BREAK/CONTINUE not supported yet. */
-        assert(insn->header.opcode != BRW_OPCODE_BREAK &&
-               insn->header.opcode != BRW_OPCODE_CONTINUE &&
-               insn->header.opcode != BRW_OPCODE_HALT);
+         assert(brw_inst_opcode(brw, insn) != BRW_OPCODE_BREAK &&
+                brw_inst_opcode(brw, insn) != BRW_OPCODE_CONTINUE &&
+                brw_inst_opcode(brw, insn) != BRW_OPCODE_HALT);
         continue;
       }
 
-      int block_end_ip = brw_find_next_block_end(p, ip);
-      switch (insn->header.opcode) {
+      int block_end_offset = brw_find_next_block_end(p, offset);
+      switch (brw_inst_opcode(brw, insn)) {
       case BRW_OPCODE_BREAK:
-         assert(block_end_ip != 0);
-        insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+         assert(block_end_offset != 0);
+         brw_inst_set_jip(brw, insn, (block_end_offset - offset) / scale);
         /* Gen7 UIP points to WHILE; Gen6 points just after it */
-        insn->bits3.break_cont.uip =
-           (brw_find_loop_end(p, ip) - ip +
-             (intel->gen == 6 ? 16 : 0)) / scale;
+         brw_inst_set_uip(brw, insn,
+           (brw_find_loop_end(p, offset) - offset +
+             (brw->gen == 6 ? 16 : 0)) / scale);
         break;
       case BRW_OPCODE_CONTINUE:
-         assert(block_end_ip != 0);
-        insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
-        insn->bits3.break_cont.uip =
-            (brw_find_loop_end(p, ip) - ip) / scale;
+         assert(block_end_offset != 0);
+         brw_inst_set_jip(brw, insn, (block_end_offset - offset) / scale);
+         brw_inst_set_uip(brw, insn,
+            (brw_find_loop_end(p, offset) - offset) / scale);
 
-        assert(insn->bits3.break_cont.uip != 0);
-        assert(insn->bits3.break_cont.jip != 0);
+         assert(brw_inst_uip(brw, insn) != 0);
+         assert(brw_inst_jip(brw, insn) != 0);
         break;
 
-      case BRW_OPCODE_ENDIF:
-         if (block_end_ip == 0)
-            insn->bits3.break_cont.jip = 2;
+      case BRW_OPCODE_ENDIF: {
+         int32_t jump = (block_end_offset == 0) ?
+                        1 * br : (block_end_offset - offset) / scale;
+         if (brw->gen >= 7)
+            brw_inst_set_jip(brw, insn, jump);
          else
-            insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+            brw_inst_set_gen6_jump_count(brw, insn, jump);
         break;
+      }
 
       case BRW_OPCODE_HALT:
         /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
@@ -2399,13 +2622,13 @@ brw_set_uip_jip(struct brw_compile *p)
          * The uip will have already been set by whoever set up the
          * instruction.
          */
-        if (block_end_ip == 0) {
-           insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
+        if (block_end_offset == 0) {
+            brw_inst_set_jip(brw, insn, brw_inst_uip(brw, insn));
         } else {
-           insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+            brw_inst_set_jip(brw, insn, (block_end_offset - offset) / scale);
         }
-        assert(insn->bits3.break_cont.uip != 0);
-        assert(insn->bits3.break_cont.jip != 0);
+         assert(brw_inst_uip(brw, insn) != 0);
+         assert(brw_inst_jip(brw, insn) != 0);
         break;
       }
    }
@@ -2413,14 +2636,14 @@ brw_set_uip_jip(struct brw_compile *p)
 
 void brw_ff_sync(struct brw_compile *p,
                   struct brw_reg dest,
-                  GLuint msg_reg_nr,
+                  unsigned msg_reg_nr,
                   struct brw_reg src0,
                   bool allocate,
-                  GLuint response_length,
+                  unsigned response_length,
                   bool eot)
 {
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
+   struct brw_context *brw = p->brw;
+   brw_inst *insn;
 
    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
@@ -2429,8 +2652,8 @@ void brw_ff_sync(struct brw_compile *p,
    brw_set_src0(p, insn, src0);
    brw_set_src1(p, insn, brw_imm_d(0));
 
-   if (intel->gen < 6)
-      insn->header.destreg__conditionalmod = msg_reg_nr;
+   if (brw->gen < 6)
+      brw_inst_set_base_mrf(brw, insn, msg_reg_nr);
 
    brw_set_ff_sync_message(p,
                           insn,
@@ -2453,12 +2676,12 @@ void brw_ff_sync(struct brw_compile *p,
 void
 brw_svb_write(struct brw_compile *p,
               struct brw_reg dest,
-              GLuint msg_reg_nr,
+              unsigned msg_reg_nr,
               struct brw_reg src0,
-              GLuint binding_table_index,
+              unsigned binding_table_index,
               bool   send_commit_msg)
 {
-   struct brw_instruction *insn;
+   brw_inst *insn;
 
    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
@@ -2478,6 +2701,160 @@ brw_svb_write(struct brw_compile *p,
                             send_commit_msg); /* send_commit_msg */
 }
 
+static void
+brw_set_dp_untyped_atomic_message(struct brw_compile *p,
+                                  brw_inst *insn,
+                                  unsigned atomic_op,
+                                  unsigned bind_table_index,
+                                  unsigned msg_length,
+                                  unsigned response_length,
+                                  bool header_present)
+{
+   const struct brw_context *brw = p->brw;
+
+   unsigned msg_control =
+      atomic_op | /* Atomic Operation Type: BRW_AOP_* */
+      (response_length ? 1 << 5 : 0); /* Return data expected */
+
+   if (brw->gen >= 8 || brw->is_haswell) {
+      brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
+                                 msg_length, response_length,
+                                 header_present, false);
+
+
+      if (brw_inst_access_mode(brw, insn) == BRW_ALIGN_1) {
+         if (brw_inst_exec_size(brw, insn) != BRW_EXECUTE_16)
+            msg_control |= 1 << 4; /* SIMD8 mode */
+
+         brw_inst_set_dp_msg_type(brw, insn,
+                                  HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP);
+      } else {
+         brw_inst_set_dp_msg_type(brw, insn,
+            HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2);
+      }
+   } else {
+      brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
+                                 msg_length, response_length,
+                                 header_present, false);
+
+      brw_inst_set_dp_msg_type(brw, insn, GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP);
+
+      if (brw_inst_exec_size(brw, insn) != BRW_EXECUTE_16)
+         msg_control |= 1 << 4; /* SIMD8 mode */
+   }
+
+   brw_inst_set_binding_table_index(brw, insn, bind_table_index);
+   brw_inst_set_dp_msg_control(brw, insn, msg_control);
+}
+
+void
+brw_untyped_atomic(struct brw_compile *p,
+                   struct brw_reg dest,
+                   struct brw_reg payload,
+                   unsigned atomic_op,
+                   unsigned bind_table_index,
+                   unsigned msg_length,
+                   unsigned response_length) {
+   const struct brw_context *brw = p->brw;
+   brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
+   brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UD));
+   brw_set_src1(p, insn, brw_imm_d(0));
+   brw_set_dp_untyped_atomic_message(
+      p, insn, atomic_op, bind_table_index, msg_length, response_length,
+      brw_inst_access_mode(brw, insn) == BRW_ALIGN_1);
+}
+
+static void
+brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
+                                        brw_inst *insn,
+                                        unsigned bind_table_index,
+                                        unsigned msg_length,
+                                        unsigned response_length,
+                                        bool header_present)
+{
+   const struct brw_context *brw = p->brw;
+   const unsigned dispatch_width =
+      (brw_inst_exec_size(brw, insn) == BRW_EXECUTE_16 ? 16 : 8);
+   const unsigned num_channels = response_length / (dispatch_width / 8);
+
+   if (brw->gen >= 8 || brw->is_haswell) {
+      brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
+                                 msg_length, response_length,
+                                 header_present, false);
+
+      brw_inst_set_dp_msg_type(brw, insn,
+                               HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ);
+   } else {
+      brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
+                                 msg_length, response_length,
+                                 header_present, false);
+
+      brw_inst_set_dp_msg_type(brw, insn,
+                               GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ);
+   }
+
+   /* Set mask of 32-bit channels to drop. */
+   unsigned msg_control = (0xf & (0xf << num_channels));
+
+   if (brw_inst_access_mode(brw, insn) == BRW_ALIGN_1) {
+      if (dispatch_width == 16)
+         msg_control |= 1 << 4; /* SIMD16 mode */
+      else
+         msg_control |= 2 << 4; /* SIMD8 mode */
+   }
+
+   brw_inst_set_binding_table_index(brw, insn, bind_table_index);
+   brw_inst_set_dp_msg_control(brw, insn, msg_control);
+}
+
+void
+brw_untyped_surface_read(struct brw_compile *p,
+                         struct brw_reg dest,
+                         struct brw_reg mrf,
+                         unsigned bind_table_index,
+                         unsigned msg_length,
+                         unsigned response_length)
+{
+   const struct brw_context *brw = p->brw;
+   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
+   brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
+   brw_set_dp_untyped_surface_read_message(
+      p, insn, bind_table_index, msg_length, response_length,
+      brw_inst_access_mode(brw, insn) == BRW_ALIGN_1);
+}
+
+void
+brw_pixel_interpolator_query(struct brw_compile *p,
+                             struct brw_reg dest,
+                             struct brw_reg mrf,
+                             bool noperspective,
+                             unsigned mode,
+                             unsigned data,
+                             unsigned msg_length,
+                             unsigned response_length)
+{
+   const struct brw_context *brw = p->brw;
+   struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, mrf);
+   brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
+                              msg_length, response_length,
+                              false /* header is never present for PI */,
+                              false);
+
+   brw_inst_set_pi_simd_mode(
+         brw, insn, brw_inst_exec_size(brw, insn) == BRW_EXECUTE_16);
+   brw_inst_set_pi_slot_group(brw, insn, 0); /* zero unless 32/64px dispatch */
+   brw_inst_set_pi_nopersp(brw, insn, noperspective);
+   brw_inst_set_pi_message_type(brw, insn, mode);
+   brw_inst_set_pi_message_data(brw, insn, data);
+}
+
 /**
  * This instruction is generated as a single-channel align1 instruction by
  * both the VS and FS stages when using INTEL_DEBUG=shader_time.
@@ -2498,13 +2875,12 @@ void brw_shader_time_add(struct brw_compile *p,
                          struct brw_reg payload,
                          uint32_t surf_index)
 {
-   struct intel_context *intel = &p->brw->intel;
-   assert(intel->gen >= 7);
+   assert(p->brw->gen >= 7);
 
    brw_push_insn_state(p);
-   brw_set_access_mode(p, BRW_ALIGN_1);
-   brw_set_mask_control(p, BRW_MASK_DISABLE);
-   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
    brw_pop_insn_state(p);
 
    /* We use brw_vec1_reg and unmasked because we want to increment the given
@@ -2514,25 +2890,8 @@ void brw_shader_time_add(struct brw_compile *p,
                                       BRW_ARF_NULL, 0));
    brw_set_src0(p, send, brw_vec1_reg(payload.file,
                                       payload.nr, 0));
-
-   uint32_t sfid, msg_type;
-   if (intel->is_haswell) {
-      sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
-      msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
-   } else {
-      sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
-      msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
-   }
-
-   bool header_present = false;
-   bool eot = false;
-   uint32_t mlen = 2; /* offset, value */
-   uint32_t rlen = 0;
-   brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot);
-
-   send->bits3.ud |= msg_type << 14;
-   send->bits3.ud |= 0 << 13; /* no return data */
-   send->bits3.ud |= 1 << 12; /* SIMD8 mode */
-   send->bits3.ud |= BRW_AOP_ADD << 8;
-   send->bits3.ud |= surf_index << 0;
+   brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index,
+                                     2 /* message length */,
+                                     0 /* response length */,
+                                     false /* header present */);
 }